Spaces:

yuantao-infini-ai
/

demo_test

Runtime error

App Files Files Community

yuantao-infini-ai commited on Jul 30

Commit

cf1798b

•

1 Parent(s): 4f617e5

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

README.md +3 -9
__init__.py +0 -0
__pycache__/__init__.cpython-310.pyc +0 -0
__pycache__/__init__.cpython-311.pyc +0 -0
__pycache__/api_provider.cpython-310.pyc +0 -0
__pycache__/base_model_worker.cpython-310.pyc +0 -0
__pycache__/cli.cpython-310.pyc +0 -0
__pycache__/cli.cpython-311.pyc +0 -0
__pycache__/controller.cpython-310.pyc +0 -0
__pycache__/gradio_web_server.cpython-310.pyc +0 -0
__pycache__/inference.cpython-310.pyc +0 -0
__pycache__/model_worker.cpython-310.pyc +0 -0
__pycache__/test_message.cpython-310.pyc +0 -0
api_provider.py +130 -0
base_model_worker.py +239 -0
cli.py +313 -0
controller.py +348 -0
gateway/README.md +57 -0
gateway/nginx.conf +97 -0
gradio_block_arena_anony.py +608 -0
gradio_block_arena_named.py +458 -0
gradio_web_server.py +883 -0
gradio_web_server_multi.py +270 -0
huggingface_api.py +73 -0
huggingface_api_worker.py +391 -0
inference.py +596 -0
launch_all_serve.py +284 -0
model_worker.py +363 -0
monitor/basic_stats.py +210 -0
monitor/clean_battle_data.py +269 -0
monitor/clean_chat_data.py +171 -0
monitor/dataset_release_scripts/arena_33k/count_unique_users.py +25 -0
monitor/dataset_release_scripts/arena_33k/filter_bad_conv.py +155 -0
monitor/dataset_release_scripts/arena_33k/merge_field.py +25 -0
monitor/dataset_release_scripts/arena_33k/sample.py +32 -0
monitor/dataset_release_scripts/arena_33k/upload_hf_dataset.py +9 -0
monitor/dataset_release_scripts/lmsys_chat_1m/approve_all.py +13 -0
monitor/dataset_release_scripts/lmsys_chat_1m/compute_stats.py +119 -0
monitor/dataset_release_scripts/lmsys_chat_1m/filter_bad_conv.py +148 -0
monitor/dataset_release_scripts/lmsys_chat_1m/final_post_processing.py +27 -0
monitor/dataset_release_scripts/lmsys_chat_1m/instructions.md +23 -0
monitor/dataset_release_scripts/lmsys_chat_1m/merge_oai_tag.py +45 -0
monitor/dataset_release_scripts/lmsys_chat_1m/process_all.sh +18 -0
monitor/dataset_release_scripts/lmsys_chat_1m/sample.py +32 -0
monitor/dataset_release_scripts/lmsys_chat_1m/upload_hf_dataset.py +17 -0
monitor/elo_analysis.py +303 -0
monitor/inspect_conv.py +87 -0
monitor/intersect_conv_file.py +25 -0
monitor/leaderboard_csv_to_html.py +51 -0
monitor/monitor.py +313 -0

README.md CHANGED Viewed

@@ -1,12 +1,6 @@
 ---
-title: Demo Test
-emoji: 🌍
-colorFrom: indigo
-colorTo: purple
 sdk: gradio
-sdk_version: 4.39.0
-app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: demo_test
+app_file: gradio_web_server.py
 sdk: gradio
+sdk_version: 3.45.0
 ---

__init__.py ADDED Viewed

File without changes

__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (168 Bytes). View file

__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (184 Bytes). View file

__pycache__/api_provider.cpython-310.pyc ADDED Viewed

Binary file (2.69 kB). View file

__pycache__/base_model_worker.cpython-310.pyc ADDED Viewed

Binary file (7.01 kB). View file

__pycache__/cli.cpython-310.pyc ADDED Viewed

Binary file (9 kB). View file

__pycache__/cli.cpython-311.pyc ADDED Viewed

Binary file (15.6 kB). View file

__pycache__/controller.cpython-310.pyc ADDED Viewed

Binary file (9.35 kB). View file

__pycache__/gradio_web_server.cpython-310.pyc ADDED Viewed

Binary file (20.6 kB). View file

__pycache__/inference.cpython-310.pyc ADDED Viewed

Binary file (11.5 kB). View file

__pycache__/model_worker.cpython-310.pyc ADDED Viewed

Binary file (9.37 kB). View file

__pycache__/test_message.cpython-310.pyc ADDED Viewed

Binary file (2.22 kB). View file

api_provider.py ADDED Viewed

	@@ -0,0 +1,130 @@

+"""Call API providers."""
+import os
+import random
+import time
+from fastchat.utils import build_logger
+from fastchat.constants import WORKER_API_TIMEOUT
+logger = build_logger("gradio_web_server", "gradio_web_server.log")
+def openai_api_stream_iter(
+    model_name,
+    messages,
+    temperature,
+    top_p,
+    max_new_tokens,
+    api_base=None,
+    api_key=None,
+):
+    import openai
+    openai.api_base = api_base or "https://api.openai.com/v1"
+    openai.api_key = api_key or os.environ["OPENAI_API_KEY"]
+    if model_name == "gpt-4-turbo":
+        model_name = "gpt-4-1106-preview"
+    # Make requests
+    gen_params = {
+        "model": model_name,
+        "prompt": messages,
+        "temperature": temperature,
+        "top_p": top_p,
+        "max_new_tokens": max_new_tokens,
+    }
+    logger.info(f"==== request ====\n{gen_params}")
+    res = openai.ChatCompletion.create(
+        model=model_name,
+        messages=messages,
+        temperature=temperature,
+        max_tokens=max_new_tokens,
+        stream=True,
+    )
+    text = ""
+    for chunk in res:
+        text += chunk["choices"][0]["delta"].get("content", "")
+        data = {
+            "text": text,
+            "error_code": 0,
+        }
+        yield data
+def anthropic_api_stream_iter(model_name, prompt, temperature, top_p, max_new_tokens):
+    import anthropic
+    c = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
+    # Make requests
+    gen_params = {
+        "model": model_name,
+        "prompt": prompt,
+        "temperature": temperature,
+        "top_p": top_p,
+        "max_new_tokens": max_new_tokens,
+    }
+    logger.info(f"==== request ====\n{gen_params}")
+    res = c.completions.create(
+        prompt=prompt,
+        stop_sequences=[anthropic.HUMAN_PROMPT],
+        max_tokens_to_sample=max_new_tokens,
+        temperature=temperature,
+        top_p=top_p,
+        model=model_name,
+        stream=True,
+    )
+    text = ""
+    for chunk in res:
+        text += chunk.completion
+        data = {
+            "text": text,
+            "error_code": 0,
+        }
+        yield data
+def init_palm_chat(model_name):
+    import vertexai  # pip3 install google-cloud-aiplatform
+    from vertexai.preview.language_models import ChatModel
+    project_id = os.environ["GCP_PROJECT_ID"]
+    location = "us-central1"
+    vertexai.init(project=project_id, location=location)
+    chat_model = ChatModel.from_pretrained(model_name)
+    chat = chat_model.start_chat(examples=[])
+    return chat
+def palm_api_stream_iter(chat, message, temperature, top_p, max_new_tokens):
+    parameters = {
+        "temperature": temperature,
+        "top_p": top_p,
+        "max_output_tokens": max_new_tokens,
+    }
+    gen_params = {
+        "model": "palm-2",
+        "prompt": message,
+    }
+    gen_params.update(parameters)
+    logger.info(f"==== request ====\n{gen_params}")
+    response = chat.send_message(message, **parameters)
+    content = response.text
+    pos = 0
+    while pos < len(content):
+        # This is a fancy way to simulate token generation latency combined
+        # with a Poisson process.
+        pos += random.randint(10, 20)
+        time.sleep(random.expovariate(50))
+        data = {
+            "text": content[:pos],
+            "error_code": 0,
+        }
+        yield data

base_model_worker.py ADDED Viewed

	@@ -0,0 +1,239 @@

+import asyncio
+import threading
+import time
+from typing import List
+from fastapi import FastAPI, Request, BackgroundTasks
+from fastapi.responses import StreamingResponse, JSONResponse
+import requests
+from fastchat.constants import WORKER_HEART_BEAT_INTERVAL
+from fastchat.conversation import Conversation
+from fastchat.utils import pretty_print_semaphore, build_logger
+worker = None
+logger = None
+app = FastAPI()
+def heart_beat_worker(obj):
+    while True:
+        time.sleep(WORKER_HEART_BEAT_INTERVAL)
+        obj.send_heart_beat()
+class BaseModelWorker:
+    def __init__(
+        self,
+        controller_addr: str,
+        worker_addr: str,
+        worker_id: str,
+        model_path: str,
+        model_names: List[str],
+        limit_worker_concurrency: int,
+        conv_template: str = None,
+    ):
+        global logger, worker
+        self.controller_addr = controller_addr
+        self.worker_addr = worker_addr
+        self.worker_id = worker_id
+        if model_path.endswith("/"):
+            model_path = model_path[:-1]
+        self.model_names = model_names or [model_path.split("/")[-1]]
+        self.limit_worker_concurrency = limit_worker_concurrency
+        self.conv = self.make_conv_template(conv_template, model_path)
+        self.conv.sep_style = int(self.conv.sep_style)
+        self.tokenizer = None
+        self.context_len = None
+        self.call_ct = 0
+        self.semaphore = None
+        self.heart_beat_thread = None
+        if logger is None:
+            logger = build_logger("model_worker", f"model_worker_{self.worker_id}.log")
+        if worker is None:
+            worker = self
+    def make_conv_template(
+        self,
+        conv_template: str = None,
+        model_path: str = None,
+    ) -> Conversation:
+        """
+        can be overrided to costomize the conversation template for different model workers.
+        """
+        from fastchat.conversation import get_conv_template
+        from fastchat.model.model_adapter import get_conversation_template
+        if conv_template:
+            conv = get_conv_template(conv_template)
+        else:
+            conv = get_conversation_template(model_path)
+        print(conv)
+        return conv
+    def init_heart_beat(self):
+        self.register_to_controller()
+        self.heart_beat_thread = threading.Thread(
+            target=heart_beat_worker,
+            args=(self,),
+            daemon=True,
+        )
+        self.heart_beat_thread.start()
+    def register_to_controller(self):
+        logger.info("Register to controller")
+        url = self.controller_addr + "/register_worker"
+        data = {
+            "worker_name": self.worker_addr,
+            "check_heart_beat": True,
+            "worker_status": self.get_status(),
+        }
+        r = requests.post(url, json=data)
+        assert r.status_code == 200
+    def send_heart_beat(self):
+        logger.info(
+            f"Send heart beat. Models: {self.model_names}. "
+            f"Semaphore: {pretty_print_semaphore(self.semaphore)}. "
+            f"call_ct: {self.call_ct}. "
+            f"worker_id: {self.worker_id}. "
+        )
+        url = self.controller_addr + "/receive_heart_beat"
+        while True:
+            try:
+                ret = requests.post(
+                    url,
+                    json={
+                        "worker_name": self.worker_addr,
+                        "queue_length": self.get_queue_length(),
+                    },
+                    timeout=5,
+                )
+                exist = ret.json()["exist"]
+                break
+            except (requests.exceptions.RequestException, KeyError) as e:
+                logger.error(f"heart beat error: {e}")
+            time.sleep(5)
+        if not exist:
+            self.register_to_controller()
+    def get_queue_length(self):
+        if (
+            self.semaphore is None
+            or self.semaphore._value is None
+            or self.semaphore._waiters is None
+        ):
+            return 0
+        else:
+            return (
+                self.limit_worker_concurrency
+                - self.semaphore._value
+                + len(self.semaphore._waiters)
+            )
+    def get_status(self):
+        return {
+            "model_names": self.model_names,
+            "speed": 1,
+            "queue_length": self.get_queue_length(),
+        }
+    def count_token(self, params):
+        prompt = params["prompt"]
+        try:
+            input_ids = self.tokenizer(prompt).input_ids
+            input_echo_len = len(input_ids)
+        except TypeError:
+            input_echo_len = self.tokenizer.num_tokens(prompt)
+        ret = {
+            "count": input_echo_len,
+            "error_code": 0,
+        }
+        return ret
+    def get_conv_template(self):
+        return {"conv": self.conv}
+    def generate_stream_gate(self, params):
+        raise NotImplementedError
+    def generate_gate(self, params):
+        raise NotImplementedError
+    def get_embeddings(self, params):
+        raise NotImplementedError
+def release_worker_semaphore():
+    worker.semaphore.release()
+def acquire_worker_semaphore():
+    if worker.semaphore is None:
+        worker.semaphore = asyncio.Semaphore(worker.limit_worker_concurrency)
+    return worker.semaphore.acquire()
+def create_background_tasks():
+    background_tasks = BackgroundTasks()
+    background_tasks.add_task(release_worker_semaphore)
+    return background_tasks
+@app.post("/worker_generate_stream")
+async def api_generate_stream(request: Request):
+    params = await request.json()
+    await acquire_worker_semaphore()
+    generator = worker.generate_stream_gate(params)
+    background_tasks = create_background_tasks()
+    return StreamingResponse(generator, background=background_tasks)
+@app.post("/worker_generate")
+async def api_generate(request: Request):
+    params = await request.json()
+    await acquire_worker_semaphore()
+    output = await asyncio.to_thread(worker.generate_gate, params)
+    release_worker_semaphore()
+    return JSONResponse(output)
+@app.post("/worker_get_embeddings")
+async def api_get_embeddings(request: Request):
+    params = await request.json()
+    await acquire_worker_semaphore()
+    embedding = worker.get_embeddings(params)
+    release_worker_semaphore()
+    return JSONResponse(content=embedding)
+@app.post("/worker_get_status")
+async def api_get_status(request: Request):
+    return worker.get_status()
+@app.post("/count_token")
+async def api_count_token(request: Request):
+    params = await request.json()
+    return worker.count_token(params)
+@app.post("/worker_get_conv_template")
+async def api_get_conv(request: Request):
+    return worker.get_conv_template()
+@app.post("/model_details")
+async def api_model_details(request: Request):
+    return {"context_length": worker.context_len}

cli.py ADDED Viewed

	@@ -0,0 +1,313 @@

+"""
+Chat with a model with command line interface.
+Usage:
+python3 -m fastchat.serve.cli --model lmsys/vicuna-7b-v1.5
+python3 -m fastchat.serve.cli --model lmsys/fastchat-t5-3b-v1.0
+Other commands:
+- Type "!!exit" or an empty line to exit.
+- Type "!!reset" to start a new conversation.
+- Type "!!remove" to remove the last prompt.
+- Type "!!regen" to regenerate the last message.
+- Type "!!save <filename>" to save the conversation history to a json file.
+- Type "!!load <filename>" to load a conversation history from a json file.
+"""
+import argparse
+import os
+import re
+import sys
+from prompt_toolkit import PromptSession
+from prompt_toolkit.auto_suggest import AutoSuggestFromHistory
+from prompt_toolkit.completion import WordCompleter
+from prompt_toolkit.history import InMemoryHistory
+from prompt_toolkit.key_binding import KeyBindings
+from rich.console import Console
+from rich.live import Live
+from rich.markdown import Markdown
+import torch
+from fastchat.model.model_adapter import add_model_args
+from fastchat.modules.awq import AWQConfig
+from fastchat.modules.exllama import ExllamaConfig
+from fastchat.modules.xfastertransformer import XftConfig
+from fastchat.modules.gptq import GptqConfig
+from fastchat.serve.inference import ChatIO, chat_loop
+from fastchat.utils import str_to_torch_dtype
+class SimpleChatIO(ChatIO):
+    def __init__(self, multiline: bool = False, prefix: str = ''):
+        self._multiline = multiline
+        self.prefix = prefix
+    def prompt_for_input(self, role) -> str:
+        if not self._multiline:
+            return input(f"{role}: {self.prefix}")
+        prompt_data = []
+        line = input(f"{role} [ctrl-d/z on empty line to end]: ")
+        while True:
+            prompt_data.append(line.strip())
+            try:
+                line = input()
+            except EOFError as e:
+                break
+        return f"\n{self.prefix}".join(prompt_data)
+    def prompt_for_output(self, role: str):
+        print(f"{role}: ", end="", flush=True)
+    def stream_output(self, output_stream):
+        pre = 0
+        for outputs in output_stream:
+            output_text = outputs["text"]
+            output_text = output_text.strip().split(" ")
+            now = len(output_text) - 1
+            if now > pre:
+                print(" ".join(output_text[pre:now]), end=" ", flush=True)
+                pre = now
+        print(" ".join(output_text[pre:]), flush=True)
+        return " ".join(output_text)
+    def print_output(self, text: str):
+        print(text)
+class RichChatIO(ChatIO):
+    bindings = KeyBindings()
+    @bindings.add("escape", "enter")
+    def _(event):
+        event.app.current_buffer.newline()
+    def __init__(self, multiline: bool = False, mouse: bool = False):
+        self._prompt_session = PromptSession(history=InMemoryHistory())
+        self._completer = WordCompleter(
+            words=["!!exit", "!!reset", "!!remove", "!!regen", "!!save", "!!load"],
+            pattern=re.compile("$"),
+        )
+        self._console = Console()
+        self._multiline = multiline
+        self._mouse = mouse
+    def prompt_for_input(self, role) -> str:
+        self._console.print(f"[bold]{role}:")
+        # TODO(suquark): multiline input has some issues. fix it later.
+        prompt_input = self._prompt_session.prompt(
+            completer=self._completer,
+            multiline=False,
+            mouse_support=self._mouse,
+            auto_suggest=AutoSuggestFromHistory(),
+            key_bindings=self.bindings if self._multiline else None,
+        )
+        self._console.print()
+        return prompt_input
+    def prompt_for_output(self, role: str):
+        self._console.print(f"[bold]{role.replace('/', '|')}:")
+    def stream_output(self, output_stream):
+        """Stream output from a role."""
+        # TODO(suquark): the console flickers when there is a code block
+        #  above it. We need to cut off "live" when a code block is done.
+        # Create a Live context for updating the console output
+        with Live(console=self._console, refresh_per_second=4) as live:
+            # Read lines from the stream
+            for outputs in output_stream:
+                if not outputs:
+                    continue
+                text = outputs["text"]
+                # Render the accumulated text as Markdown
+                # NOTE: this is a workaround for the rendering "unstandard markdown"
+                #  in rich. The chatbots output treat "\n" as a new line for
+                #  better compatibility with real-world text. However, rendering
+                #  in markdown would break the format. It is because standard markdown
+                #  treat a single "\n" in normal text as a space.
+                #  Our workaround is adding two spaces at the end of each line.
+                #  This is not a perfect solution, as it would
+                #  introduce trailing spaces (only) in code block, but it works well
+                #  especially for console output, because in general the console does not
+                #  care about trailing spaces.
+                lines = []
+                for line in text.splitlines():
+                    lines.append(line)
+                    if line.startswith("```"):
+                        # Code block marker - do not add trailing spaces, as it would
+                        #  break the syntax highlighting
+                        lines.append("\n")
+                    else:
+                        lines.append("  \n")
+                markdown = Markdown("".join(lines))
+                # Update the Live console output
+                live.update(markdown)
+        self._console.print()
+        return text
+    def print_output(self, text: str):
+        self.stream_output([{"text": text}])
+class ProgrammaticChatIO(ChatIO):
+    def prompt_for_input(self, role) -> str:
+        contents = ""
+        # `end_sequence` signals the end of a message. It is unlikely to occur in
+        #  message content.
+        end_sequence = " __END_OF_A_MESSAGE_47582648__\n"
+        len_end = len(end_sequence)
+        while True:
+            if len(contents) >= len_end:
+                last_chars = contents[-len_end:]
+                if last_chars == end_sequence:
+                    break
+            try:
+                char = sys.stdin.read(1)
+                contents = contents + char
+            except EOFError:
+                continue
+        contents = contents[:-len_end]
+        print(f"[!OP:{role}]: {contents}", flush=True)
+        return contents
+    def prompt_for_output(self, role: str):
+        print(f"[!OP:{role}]: ", end="", flush=True)
+    def stream_output(self, output_stream):
+        pre = 0
+        for outputs in output_stream:
+            output_text = outputs["text"]
+            output_text = output_text.strip().split(" ")
+            now = len(output_text) - 1
+            if now > pre:
+                print(" ".join(output_text[pre:now]), end=" ", flush=True)
+                pre = now
+        print(" ".join(output_text[pre:]), flush=True)
+        return " ".join(output_text)
+    def print_output(self, text: str):
+        print(text)
+def main(args):
+    if args.gpus:
+        if len(args.gpus.split(",")) < args.num_gpus:
+            raise ValueError(
+                f"Larger --num-gpus ({args.num_gpus}) than --gpus {args.gpus}!"
+            )
+        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus
+        os.environ["XPU_VISIBLE_DEVICES"] = args.gpus
+    if args.enable_exllama:
+        exllama_config = ExllamaConfig(
+            max_seq_len=args.exllama_max_seq_len,
+            gpu_split=args.exllama_gpu_split,
+        )
+    else:
+        exllama_config = None
+    if args.enable_xft:
+        xft_config = XftConfig(
+            max_seq_len=args.xft_max_seq_len,
+            data_type=args.xft_dtype,
+        )
+        if args.device != "cpu":
+            print("xFasterTransformer now is only support CPUs. Reset device to CPU")
+            args.device = "cpu"
+    else:
+        xft_config = None
+    if args.style == "simple":
+        chatio = SimpleChatIO(args.multiline)
+    elif args.style == "rich":
+        chatio = RichChatIO(args.multiline, args.mouse)
+    elif args.style == "programmatic":
+        chatio = ProgrammaticChatIO()
+    else:
+        raise ValueError(f"Invalid style for console: {args.style}")
+    try:
+        if args.upload_file_path:
+            prefix = open(args.upload_file_path, 'r').read()
+            args.conv_system_msg = prefix[:20000]
+        chat_loop(
+            args.model_path,
+            args.device,
+            args.num_gpus,
+            args.max_gpu_memory,
+            str_to_torch_dtype(args.dtype),
+            args.load_8bit,
+            args.cpu_offloading,
+            args.conv_template,
+            args.conv_system_msg,
+            args.temperature,
+            args.repetition_penalty,
+            args.max_new_tokens,
+            chatio,
+            gptq_config=GptqConfig(
+                ckpt=args.gptq_ckpt or args.model_path,
+                wbits=args.gptq_wbits,
+                groupsize=args.gptq_groupsize,
+                act_order=args.gptq_act_order,
+            ),
+            awq_config=AWQConfig(
+                ckpt=args.awq_ckpt or args.model_path,
+                wbits=args.awq_wbits,
+                groupsize=args.awq_groupsize,
+            ),
+            exllama_config=exllama_config,
+            xft_config=xft_config,
+            revision=args.revision,
+            judge_sent_end=args.judge_sent_end,
+            debug=args.debug,
+            history=not args.no_history,
+        )
+    except KeyboardInterrupt:
+        print("exit...")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    add_model_args(parser)
+    parser.add_argument(
+        "--conv-template", type=str, default=None, help="Conversation prompt template."
+    )
+    parser.add_argument(
+        "--conv-system-msg", type=str, default=None, help="Conversation system message."
+    )
+    parser.add_argument("--temperature", type=float, default=0.7)
+    parser.add_argument("--repetition_penalty", type=float, default=1.0)
+    parser.add_argument("--max-new-tokens", type=int, default=512)
+    parser.add_argument("--no-history", action="store_true")
+    parser.add_argument(
+        "--style",
+        type=str,
+        default="simple",
+        choices=["simple", "rich", "programmatic"],
+        help="Display style.",
+    )
+    parser.add_argument(
+        "--multiline",
+        action="store_true",
+        help="Enable multiline input. Use ESC+Enter for newline.",
+    )
+    parser.add_argument(
+        "--mouse",
+        action="store_true",
+        help="[Rich Style]: Enable mouse support for cursor positioning.",
+    )
+    parser.add_argument(
+        "--judge-sent-end",
+        action="store_true",
+        help="Whether enable the correction logic that interrupts the output of sentences due to EOS.",
+    )
+    parser.add_argument(
+        "--debug",
+        action="store_true",
+        help="Print useful debug information (e.g., prompts)",
+    )
+    parser.add_argument(
+        "--upload-file-path",
+        type=str,
+        default="",
+        help="upload long txt for summary.",
+    )
+    args = parser.parse_args()
+    main(args)

controller.py ADDED Viewed

	@@ -0,0 +1,348 @@

+"""
+A controller manages distributed workers.
+It sends worker addresses to clients.
+"""
+import argparse
+import asyncio
+import dataclasses
+from enum import Enum, auto
+import json
+import logging
+import os
+import time
+from typing import List, Union
+import threading
+from fastapi import FastAPI, Request
+from fastapi.responses import StreamingResponse
+import numpy as np
+import requests
+import uvicorn
+from fastchat.constants import (
+    CONTROLLER_HEART_BEAT_EXPIRATION,
+    WORKER_API_TIMEOUT,
+    ErrorCode,
+    SERVER_ERROR_MSG,
+)
+from fastchat.utils import build_logger
+logger = build_logger("controller", "controller.log")
+class DispatchMethod(Enum):
+    LOTTERY = auto()
+    SHORTEST_QUEUE = auto()
+    @classmethod
+    def from_str(cls, name):
+        if name == "lottery":
+            return cls.LOTTERY
+        elif name == "shortest_queue":
+            return cls.SHORTEST_QUEUE
+        else:
+            raise ValueError(f"Invalid dispatch method")
+@dataclasses.dataclass
+class WorkerInfo:
+    model_names: List[str]
+    speed: int
+    queue_length: int
+    check_heart_beat: bool
+    last_heart_beat: str
+def heart_beat_controller(controller):
+    while True:
+        time.sleep(CONTROLLER_HEART_BEAT_EXPIRATION)
+        controller.remove_stale_workers_by_expiration()
+class Controller:
+    def __init__(self, dispatch_method: str):
+        # Dict[str -> WorkerInfo]
+        self.worker_info = {}
+        self.dispatch_method = DispatchMethod.from_str(dispatch_method)
+        self.heart_beat_thread = threading.Thread(
+            target=heart_beat_controller, args=(self,)
+        )
+        self.heart_beat_thread.start()
+    def register_worker(
+        self, worker_name: str, check_heart_beat: bool, worker_status: dict
+    ):
+        if worker_name not in self.worker_info:
+            logger.info(f"Register a new worker: {worker_name}")
+        else:
+            logger.info(f"Register an existing worker: {worker_name}")
+        if not worker_status:
+            worker_status = self.get_worker_status(worker_name)
+        if not worker_status:
+            return False
+        self.worker_info[worker_name] = WorkerInfo(
+            worker_status["model_names"],
+            worker_status["speed"],
+            worker_status["queue_length"],
+            check_heart_beat,
+            time.time(),
+        )
+        logger.info(f"Register done: {worker_name}, {worker_status}")
+        return True
+    def get_worker_status(self, worker_name: str):
+        try:
+            r = requests.post(worker_name + "/worker_get_status", timeout=5)
+        except requests.exceptions.RequestException as e:
+            logger.error(f"Get status fails: {worker_name}, {e}")
+            return None
+        if r.status_code != 200:
+            logger.error(f"Get status fails: {worker_name}, {r}")
+            return None
+        return r.json()
+    def remove_worker(self, worker_name: str):
+        del self.worker_info[worker_name]
+    def refresh_all_workers(self):
+        old_info = dict(self.worker_info)
+        self.worker_info = {}
+        for w_name, w_info in old_info.items():
+            if not self.register_worker(w_name, w_info.check_heart_beat, None):
+                logger.info(f"Remove stale worker: {w_name}")
+    def list_models(self):
+        model_names = set()
+        for w_name, w_info in self.worker_info.items():
+            model_names.update(w_info.model_names)
+        return list(model_names)
+    def get_worker_address(self, model_name: str):
+        if self.dispatch_method == DispatchMethod.LOTTERY:
+            worker_names = []
+            worker_speeds = []
+            for w_name, w_info in self.worker_info.items():
+                if model_name in w_info.model_names:
+                    worker_names.append(w_name)
+                    worker_speeds.append(w_info.speed)
+            worker_speeds = np.array(worker_speeds, dtype=np.float32)
+            norm = np.sum(worker_speeds)
+            if norm < 1e-4:
+                return ""
+            worker_speeds = worker_speeds / norm
+            if True:  # Directly return address
+                pt = np.random.choice(np.arange(len(worker_names)), p=worker_speeds)
+                worker_name = worker_names[pt]
+                return worker_name
+            # Check status before returning
+            while True:
+                pt = np.random.choice(np.arange(len(worker_names)), p=worker_speeds)
+                worker_name = worker_names[pt]
+                if self.get_worker_status(worker_name):
+                    break
+                else:
+                    self.remove_worker(worker_name)
+                    worker_speeds[pt] = 0
+                    norm = np.sum(worker_speeds)
+                    if norm < 1e-4:
+                        return ""
+                    worker_speeds = worker_speeds / norm
+                    continue
+            return worker_name
+        elif self.dispatch_method == DispatchMethod.SHORTEST_QUEUE:
+            worker_names = []
+            worker_qlen = []
+            for w_name, w_info in self.worker_info.items():
+                if model_name in w_info.model_names:
+                    worker_names.append(w_name)
+                    worker_qlen.append(w_info.queue_length / w_info.speed)
+            if len(worker_names) == 0:
+                return ""
+            min_index = np.argmin(worker_qlen)
+            w_name = worker_names[min_index]
+            self.worker_info[w_name].queue_length += 1
+            logger.info(
+                f"names: {worker_names}, queue_lens: {worker_qlen}, ret: {w_name}"
+            )
+            return w_name
+        else:
+            raise ValueError(f"Invalid dispatch method: {self.dispatch_method}")
+    def receive_heart_beat(self, worker_name: str, queue_length: int):
+        if worker_name not in self.worker_info:
+            logger.info(f"Receive unknown heart beat. {worker_name}")
+            return False
+        self.worker_info[worker_name].queue_length = queue_length
+        self.worker_info[worker_name].last_heart_beat = time.time()
+        logger.info(f"Receive heart beat. {worker_name}")
+        return True
+    def remove_stale_workers_by_expiration(self):
+        expire = time.time() - CONTROLLER_HEART_BEAT_EXPIRATION
+        to_delete = []
+        for worker_name, w_info in self.worker_info.items():
+            if w_info.check_heart_beat and w_info.last_heart_beat < expire:
+                to_delete.append(worker_name)
+        for worker_name in to_delete:
+            self.remove_worker(worker_name)
+    def handle_no_worker(self, params):
+        logger.info(f"no worker: {params['model']}")
+        ret = {
+            "text": SERVER_ERROR_MSG,
+            "error_code": ErrorCode.CONTROLLER_NO_WORKER,
+        }
+        return json.dumps(ret).encode() + b"\0"
+    def handle_worker_timeout(self, worker_address):
+        logger.info(f"worker timeout: {worker_address}")
+        ret = {
+            "text": SERVER_ERROR_MSG,
+            "error_code": ErrorCode.CONTROLLER_WORKER_TIMEOUT,
+        }
+        return json.dumps(ret).encode() + b"\0"
+    # Let the controller act as a worker to achieve hierarchical
+    # management. This can be used to connect isolated sub networks.
+    def worker_api_get_status(self):
+        model_names = set()
+        speed = 0
+        queue_length = 0
+        for w_name in self.worker_info:
+            worker_status = self.get_worker_status(w_name)
+            if worker_status is not None:
+                model_names.update(worker_status["model_names"])
+                speed += worker_status["speed"]
+                queue_length += worker_status["queue_length"]
+        model_names = sorted(list(model_names))
+        return {
+            "model_names": model_names,
+            "speed": speed,
+            "queue_length": queue_length,
+        }
+    def worker_api_generate_stream(self, params):
+        worker_addr = self.get_worker_address(params["model"])
+        if not worker_addr:
+            yield self.handle_no_worker(params)
+        try:
+            response = requests.post(
+                worker_addr + "/worker_generate_stream",
+                json=params,
+                stream=True,
+                timeout=WORKER_API_TIMEOUT,
+            )
+            for chunk in response.iter_lines(decode_unicode=False, delimiter=b"\0"):
+                if chunk:
+                    yield chunk + b"\0"
+        except requests.exceptions.RequestException as e:
+            yield self.handle_worker_timeout(worker_addr)
+app = FastAPI()
+@app.post("/register_worker")
+async def register_worker(request: Request):
+    data = await request.json()
+    controller.register_worker(
+        data["worker_name"], data["check_heart_beat"], data.get("worker_status", None)
+    )
+@app.post("/refresh_all_workers")
+async def refresh_all_workers():
+    models = controller.refresh_all_workers()
+@app.post("/list_models")
+async def list_models():
+    models = controller.list_models()
+    return {"models": models}
+@app.post("/get_worker_address")
+async def get_worker_address(request: Request):
+    data = await request.json()
+    addr = controller.get_worker_address(data["model"])
+    return {"address": addr}
+@app.post("/receive_heart_beat")
+async def receive_heart_beat(request: Request):
+    data = await request.json()
+    exist = controller.receive_heart_beat(data["worker_name"], data["queue_length"])
+    return {"exist": exist}
+@app.post("/worker_generate_stream")
+async def worker_api_generate_stream(request: Request):
+    params = await request.json()
+    generator = controller.worker_api_generate_stream(params)
+    return StreamingResponse(generator)
+@app.post("/worker_get_status")
+async def worker_api_get_status(request: Request):
+    return controller.worker_api_get_status()
+@app.get("/test_connection")
+async def worker_api_get_status(request: Request):
+    return "success"
+def create_controller():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--host", type=str, default="localhost")
+    parser.add_argument("--port", type=int, default=21001)
+    parser.add_argument(
+        "--dispatch-method",
+        type=str,
+        choices=["lottery", "shortest_queue"],
+        default="shortest_queue",
+    )
+    parser.add_argument(
+        "--ssl",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Enable SSL. Requires OS Environment variables 'SSL_KEYFILE' and 'SSL_CERTFILE'.",
+    )
+    args = parser.parse_args()
+    logger.info(f"args: {args}")
+    controller = Controller(args.dispatch_method)
+    return args, controller
+if __name__ == "__main__":
+    args, controller = create_controller()
+    if args.ssl:
+        uvicorn.run(
+            app,
+            host=args.host,
+            port=args.port,
+            log_level="info",
+            ssl_keyfile=os.environ["SSL_KEYFILE"],
+            ssl_certfile=os.environ["SSL_CERTFILE"],
+        )
+    else:
+        uvicorn.run(app, host=args.host, port=args.port, log_level="info")

gateway/README.md ADDED Viewed

	@@ -0,0 +1,57 @@

+# fastchat Nginx Gateway
+## Purpose of the Gateway
+The Nginx gateway serves the following purposes:
+1. Protects Gradio servers by acting as a firewall.
+2. Facilitates dynamic mounting and unmounting of Gradio servers.
+3. Provides load balancing for Gradio servers.
+4. Offers additional security features, such as total connection limit.
+5. Reduces attack surface by requiring only a single public port to be exposed for serving.
+## Deployment and Updating of the Gateway
+### Installing Nginx
+On Debian-based distributions (e.g., Ubuntu):
+```bash
+sudo apt update
+sudo apt install nginx
+```
+On Red Hat-based distributions (e.g., CentOS, Fedora):
+```bash
+sudo yum install epel-release
+sudo yum install nginx
+```
+### Deployment
+Copy `nginx.conf` to `/etc/nginx/nginx.conf` (need sudo permission).
+Replace the port number 7860 in `server localhost:7860` with the port where you deploy the Gradio web server.
+Modify `upstream websocket` to configure Gradio servers behind the gateway.
+Lastly, update Nginx.
+### HTTPS Deployment with a Public Domain URL
+Make sure you obtain the HTTPS certificate and the private key used to generate the certificate.
+Fill the addresses to your certificate and private key in the `[PATH_TO_SSL_CERT]` and `[PATH_TO_PRIVATE_KEY]` fields.
+If you have your own domain url to serve the chatbot, replace the chat.lmsys.org url with your own domain url.
+### Updating
+Every time when `/etc/nginx/nginx.conf` is modified, you need to update the Nginx service:
+```bash
+sudo nginx -t  # check `/etc/nginx/nginx.conf`
+sudo systemctl reload nginx  # restart Nginx service to load the new config
+sudo systemctl status nginx  # check the status of the Nginx service. It should be active (running).
+```

gateway/nginx.conf ADDED Viewed

	@@ -0,0 +1,97 @@

+user www-data;
+worker_processes auto;
+pid /run/nginx.pid;
+include /etc/nginx/modules-enabled/*.conf;
+events {
+        worker_connections 1024;  # maximum number of connections that a worker process can handle concurrently
+        # multi_accept on;  # enabling multi_accept can help improve performance under high load, but may increase the number of simultaneous connections that a worker process can handle
+}
+http {
+        ##
+        # Basic Settings
+        ##
+        sendfile on;  # enable sendfile for performance optimization
+        tcp_nopush on;  # enable TCP no-pushing
+        tcp_nodelay on;  # enable TCP no-delay
+        keepalive_timeout 65;  # sets the timeout for keep-alive connections
+        types_hash_max_size 2048;  # maximum size of the types hash table
+        # server_tokens off;  # disable server token (i.e., server signature) in response headers to improve security
+        # server_names_hash_bucket_size 64;
+        # server_name_in_redirect off;
+        include /etc/nginx/mime.types;  # include MIME types file
+        default_type application/octet-stream;  # default MIME type for unknown file types
+        ##
+        # SSL Settings
+        ##
+        ssl_protocols TLSv1.2;  # specify SSL/TLS protocols to use
+        ssl_prefer_server_ciphers on;  # prefer server ciphers over client ciphers
+        ##
+        # Logging Settings
+        ##
+        access_log /var/log/nginx/access.log;  # path to access log file
+        error_log /var/log/nginx/error.log;  # path to error log file
+        ##
+        # Gzip Settings
+        ##
+        gzip on;  # enable Gzip compression
+        ##
+        # Virtual Host Configs
+        ##
+        include /etc/nginx/conf.d/*.conf;  # include all configuration files in conf.d directory
+        include /etc/nginx/sites-enabled/*;  # include all enabled sites configuration files
+        # WebSocket Proxy: https://www.nginx.com/blog/websocket-nginx/
+        map $http_upgrade $connection_upgrade {
+                default upgrade;
+                '' close;
+        }
+        upstream websocket {
+                ip_hash; # load balancing by IP to guarantee session persistence
+                server localhost:7860;  # The port should be the gradio web server port
+                # server localhost:7861;  # extra gradio server if more than one
+        }
+        limit_conn_status 429;
+        limit_conn_zone $binary_remote_addr zone=perip:10m;  # limit number of connections per IP
+        limit_conn_zone $server_name zone=perserver:10m;  # limit number of connections per server
+        server {
+                listen 443 ssl;  # the listening port of our server
+		        ssl_certificate [PATH_TO_SSL_CERT];
+		        ssl_certificate_key [PATH_TO_PRIVATE_KEY];
+		        server_name chat.lmsys.org; # replace the url with your own domain url
+                limit_conn perserver 1024;  # connections per server
+                location / {
+                        proxy_pass http://websocket;  # proxy all requests to the defined upstream server
+                        limit_conn perip 5;  # connections per IP
+                        proxy_set_header Host $host;  # set the Host header for the upstream server
+                        proxy_set_header X-Real-IP $remote_addr;  # set the client IP address as the real IP for the upstream server
+                        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;  # set the client IP addresses in the X-Forwarded-For header
+                        proxy_http_version 1.1;  # use HTTP version 1.1 for upstream communication
+                        proxy_set_header Upgrade $http_upgrade;
+                        proxy_set_header Connection "Upgrade";  # set the Connection header to Upgrade to enable WebSocket communication
+                }
+        }
+	# the following block routes all HTTP traffic to HTTPS via nginx
+	server {
+		listen 80;
+		server_name chat.lmsys.org;
+		return 301 https://chat.lmsys.org$request_uri;
+	}
+}

gradio_block_arena_anony.py ADDED Viewed

	@@ -0,0 +1,608 @@

+"""
+Chatbot Arena (battle) tab.
+Users chat with two anonymous models.
+"""
+import json
+import time
+import gradio as gr
+import numpy as np
+from fastchat.constants import (
+    MODERATION_MSG,
+    CONVERSATION_LIMIT_MSG,
+    SLOW_MODEL_MSG,
+    INPUT_CHAR_LEN_LIMIT,
+    CONVERSATION_TURN_LIMIT,
+)
+from fastchat.model.model_adapter import get_conversation_template
+from fastchat.serve.gradio_block_arena_named import flash_buttons
+from fastchat.serve.gradio_web_server import (
+    State,
+    bot_response,
+    get_conv_log_filename,
+    no_change_btn,
+    enable_btn,
+    disable_btn,
+    invisible_btn,
+    acknowledgment_md,
+    ip_expiration_dict,
+    get_ip,
+)
+from fastchat.utils import (
+    build_logger,
+    moderation_filter,
+)
+logger = build_logger("gradio_web_server_multi", "gradio_web_server_multi.log")
+num_sides = 2
+enable_moderation = False
+anony_names = ["", ""]
+models = []
+def set_global_vars_anony(enable_moderation_):
+    global enable_moderation
+    enable_moderation = enable_moderation_
+def load_demo_side_by_side_anony(models_, url_params):
+    global models
+    models = models_
+    states = (None,) * num_sides
+    selector_updates = (
+        gr.Markdown.update(visible=True),
+        gr.Markdown.update(visible=True),
+    )
+    return states + selector_updates
+def vote_last_response(states, vote_type, model_selectors, request: gr.Request):
+    with open(get_conv_log_filename(), "a") as fout:
+        data = {
+            "tstamp": round(time.time(), 4),
+            "type": vote_type,
+            "models": [x for x in model_selectors],
+            "states": [x.dict() for x in states],
+            "ip": get_ip(request),
+        }
+        fout.write(json.dumps(data) + "\n")
+    if ":" not in model_selectors[0]:
+        for i in range(15):
+            names = (
+                "### Model A: " + states[0].model_name,
+                "### Model B: " + states[1].model_name,
+            )
+            yield names + ("",) + (disable_btn,) * 4
+            time.sleep(0.2)
+    else:
+        names = (
+            "### Model A: " + states[0].model_name,
+            "### Model B: " + states[1].model_name,
+        )
+        yield names + ("",) + (disable_btn,) * 4
+def leftvote_last_response(
+    state0, state1, model_selector0, model_selector1, request: gr.Request
+):
+    logger.info(f"leftvote (anony). ip: {get_ip(request)}")
+    for x in vote_last_response(
+        [state0, state1], "leftvote", [model_selector0, model_selector1], request
+    ):
+        yield x
+def rightvote_last_response(
+    state0, state1, model_selector0, model_selector1, request: gr.Request
+):
+    logger.info(f"rightvote (anony). ip: {get_ip(request)}")
+    for x in vote_last_response(
+        [state0, state1], "rightvote", [model_selector0, model_selector1], request
+    ):
+        yield x
+def tievote_last_response(
+    state0, state1, model_selector0, model_selector1, request: gr.Request
+):
+    logger.info(f"tievote (anony). ip: {get_ip(request)}")
+    for x in vote_last_response(
+        [state0, state1], "tievote", [model_selector0, model_selector1], request
+    ):
+        yield x
+def bothbad_vote_last_response(
+    state0, state1, model_selector0, model_selector1, request: gr.Request
+):
+    logger.info(f"bothbad_vote (anony). ip: {get_ip(request)}")
+    for x in vote_last_response(
+        [state0, state1], "bothbad_vote", [model_selector0, model_selector1], request
+    ):
+        yield x
+def regenerate(state0, state1, request: gr.Request):
+    logger.info(f"regenerate (anony). ip: {get_ip(request)}")
+    states = [state0, state1]
+    for i in range(num_sides):
+        states[i].conv.update_last_message(None)
+    return states + [x.to_gradio_chatbot() for x in states] + [""] + [disable_btn] * 6
+def clear_history(request: gr.Request):
+    logger.info(f"clear_history (anony). ip: {get_ip(request)}")
+    return (
+        [None] * num_sides
+        + [None] * num_sides
+        + anony_names
+        + [""]
+        + [invisible_btn] * 4
+        + [disable_btn] * 2
+        + [""]
+    )
+def share_click(state0, state1, model_selector0, model_selector1, request: gr.Request):
+    logger.info(f"share (anony). ip: {get_ip(request)}")
+    if state0 is not None and state1 is not None:
+        vote_last_response(
+            [state0, state1], "share", [model_selector0, model_selector1], request
+        )
+SAMPLING_WEIGHTS = {
+    # tier 0
+    "gpt-4": 4,
+    "gpt-4-turbo": 4,
+    "gpt-3.5-turbo": 2,
+    "gpt-3.5-turbo-1106": 2,
+    "claude-2": 8,
+    "claude-1": 2,
+    "claude-instant-1": 8,
+    "zephyr-7b-beta": 2,
+    "openchat-3.5": 2,
+    # tier 1
+    "deluxe-chat-v1.1": 2,
+    "palm-2": 1.5,
+    "llama-2-70b-chat": 1.5,
+    "llama-2-13b-chat": 1.5,
+    "codellama-34b-instruct": 1.5,
+    "vicuna-33b": 8,
+    "vicuna-13b": 1.5,
+    "wizardlm-70b": 1.5,
+    "wizardlm-13b": 1.5,
+    "qwen-14b-chat": 1.5,
+    "mistral-7b-instruct": 1.5,
+    # tier 2
+    "vicuna-7b": 1.0,
+    "llama-2-7b-chat": 1.0,
+    "chatglm2-6b": 1.0,
+    # deprecated
+    "zephyr-7b-alpha": 1.5,
+    "codellama-13b-instruct": 1.0,
+    "mpt-30b-chat": 1.5,
+    "guanaco-33b": 1.0,
+    "fastchat-t5-3b": 0.5,
+    "alpaca-13b": 0.5,
+    "mpt-7b-chat": 0.1,
+    "oasst-pythia-12b": 0.1,
+    "RWKV-4-Raven-14B": 0.1,
+    "gpt4all-13b-snoozy": 0.1,
+    "koala-13b": 0.1,
+    "stablelm-tuned-alpha-7b": 0.1,
+    "dolly-v2-12b": 0.1,
+    "llama-13b": 0.1,
+    "chatglm-6b": 0.5,
+    "deluxe-chat-v1": 4,
+}
+# target model sampling weights will be boosted.
+BATTLE_TARGETS = {
+    "gpt-4": {"claude-2"},
+    "gpt-4-turbo": {"gpt-4", "gpt-3.5-turbo"},
+    "gpt-3.5-turbo": {"claude-instant-1", "gpt-4", "claude-2"},
+    "claude-2": {"gpt-4", "gpt-3.5-turbo", "claude-1"},
+    "claude-1": {"claude-2", "gpt-4", "gpt-3.5-turbo"},
+    "claude-instant-1": {"gpt-3.5-turbo", "claude-2"},
+    "deluxe-chat-v1.1": {"gpt-4"},
+    "openchat-3.5": {"gpt-3.5-turbo", "llama-2-70b-chat", "zephyr-7b-beta"},
+    "qwen-14b-chat": {"vicuna-13b", "llama-2-13b-chat", "llama-2-70b-chat"},
+    "zephyr-7b-alpha": {"mistral-7b-instruct", "llama-2-13b-chat"},
+    "zephyr-7b-beta": {
+        "mistral-7b-instruct",
+        "llama-2-13b-chat",
+        "llama-2-7b-chat",
+        "wizardlm-13b",
+    },
+    "llama-2-70b-chat": {"gpt-3.5-turbo", "vicuna-33b", "claude-instant-1"},
+    "llama-2-13b-chat": {"mistral-7b-instruct", "vicuna-13b", "llama-2-70b-chat"},
+    "llama-2-7b-chat": {"mistral-7b-instruct", "vicuna-7b", "llama-2-13b-chat"},
+    "mistral-7b-instruct": {
+        "llama-2-7b-chat",
+        "llama-2-13b-chat",
+        "llama-2-70b-chat",
+    },
+    "vicuna-33b": {"llama-2-70b-chat", "gpt-3.5-turbo", "claude-instant-1"},
+    "vicuna-13b": {"llama-2-13b-chat", "llama-2-70b-chat"},
+    "vicuna-7b": {"llama-2-7b-chat", "mistral-7b-instruct", "llama-2-13b-chat"},
+    "wizardlm-70b": {"gpt-3.5-turbo", "vicuna-33b", "claude-instant-1"},
+    "palm-2": {"llama-2-13b-chat", "gpt-3.5-turbo"},
+}
+SAMPLING_BOOST_MODELS = ["openchat-3.5", "gpt-4-turbo", "gpt-3.5-turbo-1106"]
+# outage models won't be sampled.
+OUTAGE_MODELS = []
+def get_sample_weight(model):
+    if model in OUTAGE_MODELS:
+        return 0
+    weight = SAMPLING_WEIGHTS.get(model, 1.0)
+    if model in SAMPLING_BOOST_MODELS:
+        weight *= 5
+    return weight
+def get_battle_pair():
+    if len(models) == 1:
+        return models[0], models[0]
+    model_weights = []
+    for model in models:
+        weight = get_sample_weight(model)
+        model_weights.append(weight)
+    total_weight = np.sum(model_weights)
+    model_weights = model_weights / total_weight
+    chosen_idx = np.random.choice(len(models), p=model_weights)
+    chosen_model = models[chosen_idx]
+    rival_models = []
+    rival_weights = []
+    for model in models:
+        if model == chosen_model:
+            continue
+        weight = get_sample_weight(model)
+        if (
+            weight != 0
+            and chosen_model in BATTLE_TARGETS
+            and model in BATTLE_TARGETS[chosen_model]
+        ):
+            # boost to 50% chance
+            weight = total_weight / len(BATTLE_TARGETS[chosen_model])
+        rival_models.append(model)
+        rival_weights.append(weight)
+    # for p, w in zip(rival_models, rival_weights):
+    #     print(p, w)
+    rival_weights = rival_weights / np.sum(rival_weights)
+    rival_idx = np.random.choice(len(rival_models), p=rival_weights)
+    rival_model = rival_models[rival_idx]
+    swap = np.random.randint(2)
+    if swap == 0:
+        return chosen_model, rival_model
+    else:
+        return rival_model, chosen_model
+def add_text(
+    state0, state1, model_selector0, model_selector1, text, request: gr.Request
+):
+    ip = get_ip(request)
+    logger.info(f"add_text (anony). ip: {ip}. len: {len(text)}")
+    states = [state0, state1]
+    model_selectors = [model_selector0, model_selector1]
+    # Init states if necessary
+    if states[0] is None:
+        assert states[1] is None
+        model_left, model_right = get_battle_pair()
+        states = [
+            State(model_left),
+            State(model_right),
+        ]
+    if len(text) <= 0:
+        for i in range(num_sides):
+            states[i].skip_next = True
+        return (
+            states
+            + [x.to_gradio_chatbot() for x in states]
+            + [""]
+            + [
+                no_change_btn,
+            ]
+            * 6
+            + [""]
+        )
+    model_list = [states[i].model_name for i in range(num_sides)]
+    flagged = moderation_filter(text, model_list)
+    if flagged:
+        logger.info(f"violate moderation (anony). ip: {ip}. text: {text}")
+        # overwrite the original text
+        text = MODERATION_MSG
+    conv = states[0].conv
+    if (len(conv.messages) - conv.offset) // 2 >= CONVERSATION_TURN_LIMIT:
+        logger.info(f"conversation turn limit. ip: {get_ip(request)}. text: {text}")
+        for i in range(num_sides):
+            states[i].skip_next = True
+        return (
+            states
+            + [x.to_gradio_chatbot() for x in states]
+            + [CONVERSATION_LIMIT_MSG]
+            + [
+                no_change_btn,
+            ]
+            * 6
+            + [""]
+        )
+    text = text[:INPUT_CHAR_LEN_LIMIT]  # Hard cut-off
+    for i in range(num_sides):
+        states[i].conv.append_message(states[i].conv.roles[0], text)
+        states[i].conv.append_message(states[i].conv.roles[1], None)
+        states[i].skip_next = False
+    slow_model_msg = ""
+    for i in range(num_sides):
+        if "deluxe" in states[i].model_name:
+            slow_model_msg = SLOW_MODEL_MSG
+    return (
+        states
+        + [x.to_gradio_chatbot() for x in states]
+        + [""]
+        + [
+            disable_btn,
+        ]
+        * 6
+        + [slow_model_msg]
+    )
+def bot_response_multi(
+    state0,
+    state1,
+    temperature,
+    top_p,
+    max_new_tokens,
+    request: gr.Request,
+):
+    logger.info(f"bot_response_multi (anony). ip: {get_ip(request)}")
+    if state0 is None or state0.skip_next:
+        # This generate call is skipped due to invalid inputs
+        yield (
+            state0,
+            state1,
+            state0.to_gradio_chatbot(),
+            state1.to_gradio_chatbot(),
+        ) + (no_change_btn,) * 6
+        return
+    states = [state0, state1]
+    gen = []
+    for i in range(num_sides):
+        gen.append(
+            bot_response(
+                states[i],
+                temperature,
+                top_p,
+                max_new_tokens,
+                request,
+            )
+        )
+    chatbots = [None] * num_sides
+    while True:
+        stop = True
+        for i in range(num_sides):
+            try:
+                ret = next(gen[i])
+                states[i], chatbots[i] = ret[0], ret[1]
+                stop = False
+            except StopIteration:
+                pass
+        yield states + chatbots + [disable_btn] * 6
+        if stop:
+            break
+def build_side_by_side_ui_anony(models):
+    notice_markdown = """
+# ⚔️  Chatbot Arena ⚔️ : Benchmarking LLMs in the Wild
+| [Blog](https://lmsys.org/blog/2023-05-03-arena/) | [GitHub](https://github.com/lm-sys/FastChat) | [Paper](https://arxiv.org/abs/2306.05685) | [Dataset](https://github.com/lm-sys/FastChat/blob/main/docs/dataset_release.md) | [Twitter](https://twitter.com/lmsysorg) | [Discord](https://discord.gg/HSWAKCrnFx) |
+## 📜 Rules
+- Ask any question to two anonymous models (e.g., ChatGPT, Claude, Llama) and vote for the better one!
+- You can continue chatting until you identify a winner.
+- Vote won't be counted if model identity is revealed during conversation.
+## 🏆 Arena Elo [Leaderboard](https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard)
+We use **100K** human votes to compile an Elo-based LLM leaderboard.
+Find out who is the 🥇LLM Champion!
+## 👇 Chat now!
+"""
+    states = [gr.State() for _ in range(num_sides)]
+    model_selectors = [None] * num_sides
+    chatbots = [None] * num_sides
+    gr.Markdown(notice_markdown, elem_id="notice_markdown")
+    with gr.Box(elem_id="share-region-anony"):
+        with gr.Row():
+            for i in range(num_sides):
+                label = "Model A" if i == 0 else "Model B"
+                with gr.Column():
+                    chatbots[i] = gr.Chatbot(
+                        label=label, elem_id=f"chatbot", height=550
+                    )
+        with gr.Row():
+            for i in range(num_sides):
+                with gr.Column():
+                    model_selectors[i] = gr.Markdown(anony_names[i])
+        with gr.Row():
+            slow_warning = gr.Markdown("", elem_id="notice_markdown")
+        with gr.Row():
+            leftvote_btn = gr.Button(
+                value="👈  A is better", visible=False, interactive=False
+            )
+            rightvote_btn = gr.Button(
+                value="👉  B is better", visible=False, interactive=False
+            )
+            tie_btn = gr.Button(value="🤝  Tie", visible=False, interactive=False)
+            bothbad_btn = gr.Button(
+                value="👎  Both are bad", visible=False, interactive=False
+            )
+    with gr.Row():
+        with gr.Column(scale=20):
+            textbox = gr.Textbox(
+                show_label=False,
+                placeholder="👉 Enter your prompt and press ENTER",
+                container=False,
+                elem_id="input_box",
+            )
+        with gr.Column(scale=1, min_width=50):
+            send_btn = gr.Button(value="Send", variant="primary")
+    with gr.Row() as button_row:
+        clear_btn = gr.Button(value="🎲 New Round", interactive=False)
+        regenerate_btn = gr.Button(value="🔄  Regenerate", interactive=False)
+        share_btn = gr.Button(value="📷  Share")
+    with gr.Accordion("Parameters", open=False) as parameter_row:
+        temperature = gr.Slider(
+            minimum=0.0,
+            maximum=1.0,
+            value=0.7,
+            step=0.1,
+            interactive=True,
+            label="Temperature",
+        )
+        top_p = gr.Slider(
+            minimum=0.0,
+            maximum=1.0,
+            value=1.0,
+            step=0.1,
+            interactive=True,
+            label="Top P",
+        )
+        max_output_tokens = gr.Slider(
+            minimum=16,
+            maximum=1024,
+            value=512,
+            step=64,
+            interactive=True,
+            label="Max output tokens",
+        )
+    gr.Markdown(acknowledgment_md)
+    # Register listeners
+    btn_list = [
+        leftvote_btn,
+        rightvote_btn,
+        tie_btn,
+        bothbad_btn,
+        regenerate_btn,
+        clear_btn,
+    ]
+    leftvote_btn.click(
+        leftvote_last_response,
+        states + model_selectors,
+        model_selectors + [textbox, leftvote_btn, rightvote_btn, tie_btn, bothbad_btn],
+    )
+    rightvote_btn.click(
+        rightvote_last_response,
+        states + model_selectors,
+        model_selectors + [textbox, leftvote_btn, rightvote_btn, tie_btn, bothbad_btn],
+    )
+    tie_btn.click(
+        tievote_last_response,
+        states + model_selectors,
+        model_selectors + [textbox, leftvote_btn, rightvote_btn, tie_btn, bothbad_btn],
+    )
+    bothbad_btn.click(
+        bothbad_vote_last_response,
+        states + model_selectors,
+        model_selectors + [textbox, leftvote_btn, rightvote_btn, tie_btn, bothbad_btn],
+    )
+    regenerate_btn.click(
+        regenerate, states, states + chatbots + [textbox] + btn_list
+    ).then(
+        bot_response_multi,
+        states + [temperature, top_p, max_output_tokens],
+        states + chatbots + btn_list,
+    ).then(
+        flash_buttons, [], btn_list
+    )
+    clear_btn.click(
+        clear_history,
+        None,
+        states + chatbots + model_selectors + [textbox] + btn_list + [slow_warning],
+    )
+    share_js = """
+function (a, b, c, d) {
+    const captureElement = document.querySelector('#share-region-anony');
+    html2canvas(captureElement)
+        .then(canvas => {
+            canvas.style.display = 'none'
+            document.body.appendChild(canvas)
+            return canvas
+        })
+        .then(canvas => {
+            const image = canvas.toDataURL('image/png')
+            const a = document.createElement('a')
+            a.setAttribute('download', 'chatbot-arena.png')
+            a.setAttribute('href', image)
+            a.click()
+            canvas.remove()
+        });
+    return [a, b, c, d];
+}
+"""
+    share_btn.click(share_click, states + model_selectors, [], _js=share_js)
+    textbox.submit(
+        add_text,
+        states + model_selectors + [textbox],
+        states + chatbots + [textbox] + btn_list + [slow_warning],
+    ).then(
+        bot_response_multi,
+        states + [temperature, top_p, max_output_tokens],
+        states + chatbots + btn_list,
+    ).then(
+        flash_buttons,
+        [],
+        btn_list,
+    )
+    send_btn.click(
+        add_text,
+        states + model_selectors + [textbox],
+        states + chatbots + [textbox] + btn_list,
+    ).then(
+        bot_response_multi,
+        states + [temperature, top_p, max_output_tokens],
+        states + chatbots + btn_list,
+    ).then(
+        flash_buttons, [], btn_list
+    )
+    return states + model_selectors

gradio_block_arena_named.py ADDED Viewed

	@@ -0,0 +1,458 @@

+"""
+Chatbot Arena (side-by-side) tab.
+Users chat with two chosen models.
+"""
+import json
+import time
+import gradio as gr
+import numpy as np
+from fastchat.constants import (
+    MODERATION_MSG,
+    CONVERSATION_LIMIT_MSG,
+    INPUT_CHAR_LEN_LIMIT,
+    CONVERSATION_TURN_LIMIT,
+)
+from fastchat.model.model_adapter import get_conversation_template
+from fastchat.serve.gradio_web_server import (
+    State,
+    bot_response,
+    get_conv_log_filename,
+    no_change_btn,
+    enable_btn,
+    disable_btn,
+    invisible_btn,
+    acknowledgment_md,
+    get_model_description_md,
+    ip_expiration_dict,
+    get_ip,
+)
+from fastchat.utils import (
+    build_logger,
+    moderation_filter,
+)
+logger = build_logger("gradio_web_server_multi", "gradio_web_server_multi.log")
+num_sides = 2
+enable_moderation = False
+def set_global_vars_named(enable_moderation_):
+    global enable_moderation
+    enable_moderation = enable_moderation_
+def load_demo_side_by_side_named(models, url_params):
+    states = (None,) * num_sides
+    model_left = models[0] if len(models) > 0 else ""
+    if len(models) > 1:
+        weights = ([8] * 4 + [4] * 8 + [1] * 32)[: len(models) - 1]
+        weights = weights / np.sum(weights)
+        model_right = np.random.choice(models[1:], p=weights)
+    else:
+        model_right = model_left
+    selector_updates = (
+        gr.Dropdown.update(choices=models, value=model_left, visible=True),
+        gr.Dropdown.update(choices=models, value=model_right, visible=True),
+    )
+    return states + selector_updates
+def vote_last_response(states, vote_type, model_selectors, request: gr.Request):
+    with open(get_conv_log_filename(), "a") as fout:
+        data = {
+            "tstamp": round(time.time(), 4),
+            "type": vote_type,
+            "models": [x for x in model_selectors],
+            "states": [x.dict() for x in states],
+            "ip": get_ip(request),
+        }
+        fout.write(json.dumps(data) + "\n")
+def leftvote_last_response(
+    state0, state1, model_selector0, model_selector1, request: gr.Request
+):
+    logger.info(f"leftvote (named). ip: {get_ip(request)}")
+    vote_last_response(
+        [state0, state1], "leftvote", [model_selector0, model_selector1], request
+    )
+    return ("",) + (disable_btn,) * 4
+def rightvote_last_response(
+    state0, state1, model_selector0, model_selector1, request: gr.Request
+):
+    logger.info(f"rightvote (named). ip: {get_ip(request)}")
+    vote_last_response(
+        [state0, state1], "rightvote", [model_selector0, model_selector1], request
+    )
+    return ("",) + (disable_btn,) * 4
+def tievote_last_response(
+    state0, state1, model_selector0, model_selector1, request: gr.Request
+):
+    logger.info(f"tievote (named). ip: {get_ip(request)}")
+    vote_last_response(
+        [state0, state1], "tievote", [model_selector0, model_selector1], request
+    )
+    return ("",) + (disable_btn,) * 4
+def bothbad_vote_last_response(
+    state0, state1, model_selector0, model_selector1, request: gr.Request
+):
+    logger.info(f"bothbad_vote (named). ip: {get_ip(request)}")
+    vote_last_response(
+        [state0, state1], "bothbad_vote", [model_selector0, model_selector1], request
+    )
+    return ("",) + (disable_btn,) * 4
+def regenerate(state0, state1, request: gr.Request):
+    logger.info(f"regenerate (named). ip: {get_ip(request)}")
+    states = [state0, state1]
+    for i in range(num_sides):
+        states[i].conv.update_last_message(None)
+    return states + [x.to_gradio_chatbot() for x in states] + [""] + [disable_btn] * 6
+def clear_history(request: gr.Request):
+    logger.info(f"clear_history (named). ip: {get_ip(request)}")
+    return (
+        [None] * num_sides
+        + [None] * num_sides
+        + [""]
+        + [invisible_btn] * 4
+        + [disable_btn] * 2
+    )
+def share_click(state0, state1, model_selector0, model_selector1, request: gr.Request):
+    logger.info(f"share (named). ip: {get_ip(request)}")
+    if state0 is not None and state1 is not None:
+        vote_last_response(
+            [state0, state1], "share", [model_selector0, model_selector1], request
+        )
+def add_text(
+    state0, state1, model_selector0, model_selector1, text, request: gr.Request
+):
+    ip = get_ip(request)
+    logger.info(f"add_text (named). ip: {ip}. len: {len(text)}")
+    states = [state0, state1]
+    model_selectors = [model_selector0, model_selector1]
+    # Init states if necessary
+    for i in range(num_sides):
+        if states[i] is None:
+            states[i] = State(model_selectors[i])
+    if len(text) <= 0:
+        for i in range(num_sides):
+            states[i].skip_next = True
+        return (
+            states
+            + [x.to_gradio_chatbot() for x in states]
+            + [""]
+            + [
+                no_change_btn,
+            ]
+            * 6
+        )
+    model_list = [states[i].model_name for i in range(num_sides)]
+    flagged = moderation_filter(text, model_list)
+    if flagged:
+        logger.info(f"violate moderation (named). ip: {ip}. text: {text}")
+        # overwrite the original text
+        text = MODERATION_MSG
+    conv = states[0].conv
+    if (len(conv.messages) - conv.offset) // 2 >= CONVERSATION_TURN_LIMIT:
+        logger.info(f"conversation turn limit. ip: {ip}. text: {text}")
+        for i in range(num_sides):
+            states[i].skip_next = True
+        return (
+            states
+            + [x.to_gradio_chatbot() for x in states]
+            + [CONVERSATION_LIMIT_MSG]
+            + [
+                no_change_btn,
+            ]
+            * 6
+        )
+    text = text[:INPUT_CHAR_LEN_LIMIT]  # Hard cut-off
+    for i in range(num_sides):
+        states[i].conv.append_message(states[i].conv.roles[0], text)
+        states[i].conv.append_message(states[i].conv.roles[1], None)
+        states[i].skip_next = False
+    return (
+        states
+        + [x.to_gradio_chatbot() for x in states]
+        + [""]
+        + [
+            disable_btn,
+        ]
+        * 6
+    )
+def bot_response_multi(
+    state0,
+    state1,
+    temperature,
+    top_p,
+    max_new_tokens,
+    request: gr.Request,
+):
+    logger.info(f"bot_response_multi (named). ip: {get_ip(request)}")
+    if state0.skip_next:
+        # This generate call is skipped due to invalid inputs
+        yield (
+            state0,
+            state1,
+            state0.to_gradio_chatbot(),
+            state1.to_gradio_chatbot(),
+        ) + (no_change_btn,) * 6
+        return
+    states = [state0, state1]
+    gen = []
+    for i in range(num_sides):
+        gen.append(
+            bot_response(
+                states[i],
+                temperature,
+                top_p,
+                max_new_tokens,
+                request,
+            )
+        )
+    chatbots = [None] * num_sides
+    while True:
+        stop = True
+        for i in range(num_sides):
+            try:
+                ret = next(gen[i])
+                states[i], chatbots[i] = ret[0], ret[1]
+                stop = False
+            except StopIteration:
+                pass
+        yield states + chatbots + [disable_btn] * 6
+        if stop:
+            break
+def flash_buttons():
+    btn_updates = [
+        [disable_btn] * 4 + [enable_btn] * 2,
+        [enable_btn] * 6,
+    ]
+    for i in range(4):
+        yield btn_updates[i % 2]
+        time.sleep(0.5)
+def build_side_by_side_ui_named(models):
+    notice_markdown = """
+# ⚔️  Chatbot Arena ⚔️ : Benchmarking LLMs in the Wild
+| [Blog](https://lmsys.org/blog/2023-05-03-arena/) | [GitHub](https://github.com/lm-sys/FastChat) | [Paper](https://arxiv.org/abs/2306.05685) | [Dataset](https://github.com/lm-sys/FastChat/blob/main/docs/dataset_release.md) | [Twitter](https://twitter.com/lmsysorg) | [Discord](https://discord.gg/HSWAKCrnFx) |
+## 📜 Rules
+- Chat with any two models side-by-side and vote!
+- You can continue chatting for multiple rounds.
+- Click "Clear history" to start a new round.
+## 🤖 Choose two models to compare
+"""
+    states = [gr.State() for _ in range(num_sides)]
+    model_selectors = [None] * num_sides
+    chatbots = [None] * num_sides
+    model_description_md = get_model_description_md(models)
+    notice = gr.Markdown(
+        notice_markdown + model_description_md, elem_id="notice_markdown"
+    )
+    with gr.Box(elem_id="share-region-named"):
+        with gr.Row():
+            for i in range(num_sides):
+                with gr.Column():
+                    model_selectors[i] = gr.Dropdown(
+                        choices=models,
+                        value=models[i] if len(models) > i else "",
+                        interactive=True,
+                        show_label=False,
+                        container=False,
+                    )
+        with gr.Row():
+            for i in range(num_sides):
+                label = "Model A" if i == 0 else "Model B"
+                with gr.Column():
+                    chatbots[i] = gr.Chatbot(
+                        label=label, elem_id=f"chatbot", height=550
+                    )
+        with gr.Row():
+            leftvote_btn = gr.Button(
+                value="👈  A is better", visible=False, interactive=False
+            )
+            rightvote_btn = gr.Button(
+                value="👉  B is better", visible=False, interactive=False
+            )
+            tie_btn = gr.Button(value="🤝  Tie", visible=False, interactive=False)
+            bothbad_btn = gr.Button(
+                value="👎  Both are bad", visible=False, interactive=False
+            )
+    with gr.Row():
+        with gr.Column(scale=20):
+            textbox = gr.Textbox(
+                show_label=False,
+                placeholder="Enter your prompt here and press ENTER",
+                container=False,
+                elem_id="input_box",
+            )
+        with gr.Column(scale=1, min_width=50):
+            send_btn = gr.Button(value="Send", variant="primary")
+    with gr.Row() as button_row:
+        regenerate_btn = gr.Button(value="🔄  Regenerate", interactive=False)
+        clear_btn = gr.Button(value="🗑️  Clear history", interactive=False)
+        share_btn = gr.Button(value="📷  Share")
+    with gr.Accordion("Parameters", open=False) as parameter_row:
+        temperature = gr.Slider(
+            minimum=0.0,
+            maximum=1.0,
+            value=0.7,
+            step=0.1,
+            interactive=True,
+            label="Temperature",
+        )
+        top_p = gr.Slider(
+            minimum=0.0,
+            maximum=1.0,
+            value=1.0,
+            step=0.1,
+            interactive=True,
+            label="Top P",
+        )
+        max_output_tokens = gr.Slider(
+            minimum=16,
+            maximum=1024,
+            value=512,
+            step=64,
+            interactive=True,
+            label="Max output tokens",
+        )
+    gr.Markdown(acknowledgment_md)
+    # Register listeners
+    btn_list = [
+        leftvote_btn,
+        rightvote_btn,
+        tie_btn,
+        bothbad_btn,
+        regenerate_btn,
+        clear_btn,
+    ]
+    leftvote_btn.click(
+        leftvote_last_response,
+        states + model_selectors,
+        [textbox, leftvote_btn, rightvote_btn, tie_btn, bothbad_btn],
+    )
+    rightvote_btn.click(
+        rightvote_last_response,
+        states + model_selectors,
+        [textbox, leftvote_btn, rightvote_btn, tie_btn, bothbad_btn],
+    )
+    tie_btn.click(
+        tievote_last_response,
+        states + model_selectors,
+        [textbox, leftvote_btn, rightvote_btn, tie_btn, bothbad_btn],
+    )
+    bothbad_btn.click(
+        bothbad_vote_last_response,
+        states + model_selectors,
+        [textbox, leftvote_btn, rightvote_btn, tie_btn, bothbad_btn],
+    )
+    regenerate_btn.click(
+        regenerate, states, states + chatbots + [textbox] + btn_list
+    ).then(
+        bot_response_multi,
+        states + [temperature, top_p, max_output_tokens],
+        states + chatbots + btn_list,
+    ).then(
+        flash_buttons, [], btn_list
+    )
+    clear_btn.click(clear_history, None, states + chatbots + [textbox] + btn_list)
+    share_js = """
+function (a, b, c, d) {
+    const captureElement = document.querySelector('#share-region-named');
+    html2canvas(captureElement)
+        .then(canvas => {
+            canvas.style.display = 'none'
+            document.body.appendChild(canvas)
+            return canvas
+        })
+        .then(canvas => {
+            const image = canvas.toDataURL('image/png')
+            const a = document.createElement('a')
+            a.setAttribute('download', 'chatbot-arena.png')
+            a.setAttribute('href', image)
+            a.click()
+            canvas.remove()
+        });
+    return [a, b, c, d];
+}
+"""
+    share_btn.click(share_click, states + model_selectors, [], _js=share_js)
+    for i in range(num_sides):
+        model_selectors[i].change(
+            clear_history, None, states + chatbots + [textbox] + btn_list
+        )
+    textbox.submit(
+        add_text,
+        states + model_selectors + [textbox],
+        states + chatbots + [textbox] + btn_list,
+    ).then(
+        bot_response_multi,
+        states + [temperature, top_p, max_output_tokens],
+        states + chatbots + btn_list,
+    ).then(
+        flash_buttons, [], btn_list
+    )
+    send_btn.click(
+        add_text,
+        states + model_selectors + [textbox],
+        states + chatbots + [textbox] + btn_list,
+    ).then(
+        bot_response_multi,
+        states + [temperature, top_p, max_output_tokens],
+        states + chatbots + btn_list,
+    ).then(
+        flash_buttons, [], btn_list
+    )
+    return states + model_selectors

gradio_web_server.py ADDED Viewed

	@@ -0,0 +1,883 @@

+"""
+The gradio demo server for chatting with a single model.
+"""
+import argparse
+from collections import defaultdict
+import datetime
+import json
+import os
+import random
+import time
+import uuid
+import gradio as gr
+import requests
+from fastchat.conversation import SeparatorStyle
+from fastchat.constants import (
+    LOGDIR,
+    WORKER_API_TIMEOUT,
+    ErrorCode,
+    MODERATION_MSG,
+    CONVERSATION_LIMIT_MSG,
+    SERVER_ERROR_MSG,
+    INPUT_CHAR_LEN_LIMIT,
+    CONVERSATION_TURN_LIMIT,
+    SESSION_EXPIRATION_TIME,
+)
+from fastchat.model.model_adapter import get_conversation_template
+from fastchat.conversation import get_conv_template
+from fastchat.model.model_registry import get_model_info, model_info
+from fastchat.serve.api_provider import (
+    anthropic_api_stream_iter,
+    openai_api_stream_iter,
+    palm_api_stream_iter,
+    init_palm_chat,
+)
+from fastchat.utils import (
+    build_logger,
+    moderation_filter,
+    get_window_url_params_js,
+    get_window_url_params_with_tos_js,
+    parse_gradio_auth_creds,
+)
+CONV_TEMPLATE = ''
+logger = build_logger("gradio_web_server", "gradio_web_server.log")
+headers = {"User-Agent": "FastChat Client"}
+no_change_btn = gr.Button.update()
+enable_btn = gr.Button.update(interactive=True, visible=True)
+disable_btn = gr.Button.update(interactive=False)
+invisible_btn = gr.Button.update(interactive=False, visible=False)
+controller_url = None
+enable_moderation = False
+acknowledgment_md = """
+### Acknowledgment
+<div class="image-container">
+    <p> We thank <a href="https://www.kaggle.com/" target="_blank">Kaggle</a>, <a href="https://mbzuai.ac.ae/" target="_blank">MBZUAI</a>, <a href="https://www.anyscale.com/" target="_blank">AnyScale</a>, and <a href="https://huggingface.co/" target="_blank">HuggingFace</a> for their <a href="https://lmsys.org/donations/" target="_blank">sponsorship</a>. </p>
+    <img src="https://upload.wikimedia.org/wikipedia/commons/thumb/7/7c/Kaggle_logo.png/400px-Kaggle_logo.png" alt="Image 1">
+    <img src="https://mma.prnewswire.com/media/1227419/MBZUAI_Logo.jpg?p=facebookg" alt="Image 2">
+    <img src="https://docs.anyscale.com/site-assets/logo.png" alt="Image 3">
+    <img src="https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo-with-title.png" alt="Image 4">
+</div>
+"""
+ip_expiration_dict = defaultdict(lambda: 0)
+# Information about custom OpenAI compatible API models.
+# JSON file format:
+# {
+#     "vicuna-7b": {
+#         "model_name": "vicuna-7b-v1.5",
+#         "api_base": "http://8.8.8.55:5555/v1",
+#         "api_key": "password"
+#     },
+# }
+openai_compatible_models_info = {}
+class State:
+    def __init__(self, model_name):
+        # if model_name=='checkpoint-800':
+        #     self.conv = get_conv_template(CONV_TEMPLATE)
+        # elif model_name=='MiniCPM-2B-sft-bf16':
+        ret = requests.post(
+            controller_url + "/get_worker_address", json={"model": model_name}
+        )
+        worker_addr = ret.json()["address"]
+        conv_name = requests.post(
+            worker_addr + "/worker_get_conv_template",
+        ).json()['conv']['name']
+        self.conv = get_conv_template(conv_name)
+        # self.conv = get_conv_template('minicpm')
+        # print(self.conv)
+        # self.conv = get_conversation_template(model_name)
+        self.conv_id = uuid.uuid4().hex
+        self.skip_next = False
+        self.model_name = model_name
+        if model_name == "palm-2":
+            # According to release note, "chat-bison@001" is PaLM 2 for chat.
+            # https://cloud.google.com/vertex-ai/docs/release-notes#May_10_2023
+            self.palm_chat = init_palm_chat("chat-bison@001")
+    def to_gradio_chatbot(self):
+        return self.conv.to_gradio_chatbot()
+    def dict(self):
+        base = self.conv.dict()
+        base.update(
+            {
+                "conv_id": self.conv_id,
+                "model_name": self.model_name,
+            }
+        )
+        return base
+def set_global_vars(controller_url_, enable_moderation_):
+    global controller_url, enable_moderation
+    controller_url = controller_url_
+    enable_moderation = enable_moderation_
+def get_conv_log_filename():
+    t = datetime.datetime.now()
+    name = os.path.join(LOGDIR, f"{t.year}-{t.month:02d}-{t.day:02d}-conv.json")
+    return name
+def get_model_list(
+    controller_url, register_openai_compatible_models, add_chatgpt, add_claude, add_palm
+):
+    if controller_url:
+        ret = requests.post(controller_url + "/refresh_all_workers")
+        assert ret.status_code == 200
+        ret = requests.post(controller_url + "/list_models")
+        # ret = requests.post(controller_url + "/get_worker_address")
+        # ret = requests.post(controller_url + "/worker_get_status")
+        models = ret.json()["models"]
+    else:
+        models = []
+    # Add API providers
+    if register_openai_compatible_models:
+        global openai_compatible_models_info
+        openai_compatible_models_info = json.load(
+            open(register_openai_compatible_models)
+        )
+        models += list(openai_compatible_models_info.keys())
+    if add_chatgpt:
+        models += ["gpt-3.5-turbo", "gpt-4", "gpt-4-turbo", "gpt-3.5-turbo-1106"]
+    if add_claude:
+        models += ["claude-2", "claude-instant-1"]
+    if add_palm:
+        models += ["palm-2"]
+    models = list(set(models))
+    if "deluxe-chat-v1" in models:
+        del models[models.index("deluxe-chat-v1")]
+    if "deluxe-chat-v1.1" in models:
+        del models[models.index("deluxe-chat-v1.1")]
+    priority = {k: f"___{i:02d}" for i, k in enumerate(model_info)}
+    models.sort(key=lambda x: priority.get(x, x))
+    logger.info(f"Models: {models}")
+    return models
+def load_demo_single(models, url_params):
+    selected_model = models[0] if len(models) > 0 else ""
+    if "model" in url_params:
+        model = url_params["model"]
+        if model in models:
+            selected_model = model
+    dropdown_update = gr.Dropdown.update(
+        choices=models, value=selected_model, visible=True
+    )
+    state = None
+    return state, dropdown_update
+def load_demo(url_params, request: gr.Request):
+    global models
+    ip = get_ip(request)
+    logger.info(f"load_demo. ip: {ip}. params: {url_params}")
+    ip_expiration_dict[ip] = time.time() + SESSION_EXPIRATION_TIME
+    if args.model_list_mode == "reload":
+        models = get_model_list(
+            controller_url,
+            args.register_openai_compatible_models,
+            args.add_chatgpt,
+            args.add_claude,
+            args.add_palm,
+        )
+    return load_demo_single(models, url_params)
+def vote_last_response(state, vote_type, model_selector, request: gr.Request):
+    with open('./web_chat_downvote.jsonl', "a+") as fout:
+        # data = {
+        #     "tstamp": round(time.time(), 4),
+        #     "type": vote_type,
+        #     "model": model_selector,
+        #     "state": state.dict(),
+        #     "ip": get_ip(request),
+        # }
+        conversations = []
+        for i, turn in enumerate(state.dict()['messages']):
+            role = 'user' if i % 2 == 0 else 'assistant'
+            conversations.append({'role': role, 'content': turn[1]})
+        data = {
+            'conversations': conversations,
+            'idx': state.dict()['conv_id'],
+            'tinder': 'badcase',
+            'model': state.dict()['model_name'],
+            'tokens_in': -1,
+            'tokens_out': -1,
+        }
+        fout.write(json.dumps(data, ensure_ascii=False) + "\n")
+def upvote_last_response(state, model_selector, request: gr.Request):
+    ip = get_ip(request)
+    logger.info(f"upvote. ip: {ip}")
+    vote_last_response(state, "upvote", model_selector, request)
+    return ("",) + (disable_btn,) * 3
+def downvote_last_response(state, model_selector, request: gr.Request):
+    ip = get_ip(request)
+    logger.info(f"downvote. ip: {ip}")
+    vote_last_response(state, "downvote", model_selector, request)
+    return ("",) + (disable_btn,) * 3
+def flag_last_response(state, model_selector, request: gr.Request):
+    ip = get_ip(request)
+    logger.info(f"flag. ip: {ip}")
+    vote_last_response(state, "flag", model_selector, request)
+    return ("",) + (disable_btn,) * 3
+def regenerate(state, request: gr.Request):
+    ip = get_ip(request)
+    logger.info(f"regenerate. ip: {ip}")
+    state.conv.update_last_message(None)
+    return (state, state.to_gradio_chatbot(), "") + (disable_btn,) * 5
+def clear_history(request: gr.Request):
+    ip = get_ip(request)
+    logger.info(f"clear_history. ip: {ip}")
+    state = None
+    return (state, [], "") + (disable_btn,) * 5
+def get_ip(request: gr.Request):
+    if "cf-connecting-ip" in request.headers:
+        ip = request.headers["cf-connecting-ip"]
+    else:
+        ip = request.client.host
+    return ip
+def add_text(state, model_selector, text, request: gr.Request):
+    ip = get_ip(request)
+    logger.info(f"add_text. ip: {ip}. len: {len(text)}")
+    if state is None:
+        state = State(model_selector)
+    if len(text) <= 0:
+        state.skip_next = True
+        return (state, state.to_gradio_chatbot(), "") + (no_change_btn,) * 5
+    flagged = moderation_filter(text, [state.model_name])
+    if flagged:
+        logger.info(f"violate moderation. ip: {ip}. text: {text}")
+        # overwrite the original text
+        text = MODERATION_MSG
+    conv = state.conv
+    if (len(conv.messages) - conv.offset) // 2 >= CONVERSATION_TURN_LIMIT:
+        logger.info(f"conversation turn limit. ip: {ip}. text: {text}")
+        state.skip_next = True
+        return (state, state.to_gradio_chatbot(), CONVERSATION_LIMIT_MSG) + (
+            no_change_btn,
+        ) * 5
+    text = text[:INPUT_CHAR_LEN_LIMIT]  # Hard cut-off
+    conv.append_message(conv.roles[0], text)
+    conv.append_message(conv.roles[1], None)
+    return (state, state.to_gradio_chatbot(), "") + (disable_btn,) * 5
+def post_process_code(code):
+    sep = "\n```"
+    if sep in code:
+        blocks = code.split(sep)
+        if len(blocks) % 2 == 1:
+            for i in range(1, len(blocks), 2):
+                blocks[i] = blocks[i].replace("\\_", "_")
+        code = sep.join(blocks)
+    return code
+def model_worker_stream_iter(
+    conv,
+    model_name,
+    worker_addr,
+    prompt,
+    temperature,
+    repetition_penalty,
+    top_p,
+    max_new_tokens,
+):
+    # Make requests
+    gen_params = {
+        "model": model_name,
+        "prompt": prompt,
+        "temperature": temperature,
+        "repetition_penalty": repetition_penalty,
+        "top_p": top_p,
+        "max_new_tokens": max_new_tokens,
+        "stop": conv.stop_str,
+        "stop_token_ids": conv.stop_token_ids,
+        "echo": False,
+    }
+    logger.info(f"==== request ====\n{gen_params}")
+    # Stream output
+    response = requests.post(
+        worker_addr + "/worker_generate_stream",
+        headers=headers,
+        json=gen_params,
+        stream=True,
+        timeout=WORKER_API_TIMEOUT,
+    )
+    for chunk in response.iter_lines(decode_unicode=False, delimiter=b"\0"):
+        if chunk:
+            data = json.loads(chunk.decode())
+            yield data
+def bot_response(state, temperature, top_p, max_new_tokens, request: gr.Request):
+    ip = get_ip(request)
+    logger.info(f"bot_response. ip: {ip}")
+    start_tstamp = time.time()
+    temperature = float(temperature)
+    top_p = float(top_p)
+    max_new_tokens = int(max_new_tokens)
+    if state.skip_next:
+        # This generate call is skipped due to invalid inputs
+        state.skip_next = False
+        yield (state, state.to_gradio_chatbot()) + (no_change_btn,) * 5
+        return
+    conv, model_name = state.conv, state.model_name
+    if model_name in ["gpt-3.5-turbo", "gpt-4", "gpt-4-turbo", "gpt-3.5-turbo-1106"]:
+        prompt = conv.to_openai_api_messages()
+        stream_iter = openai_api_stream_iter(
+            model_name, prompt, temperature, top_p, max_new_tokens
+        )
+    elif model_name in ["claude-2", "claude-1", "claude-instant-1"]:
+        prompt = conv.get_prompt()
+        stream_iter = anthropic_api_stream_iter(
+            model_name, prompt, temperature, top_p, max_new_tokens
+        )
+    elif model_name == "palm-2":
+        stream_iter = palm_api_stream_iter(
+            state.palm_chat, conv.messages[-2][1], temperature, top_p, max_new_tokens
+        )
+    elif model_name in openai_compatible_models_info:
+        model_info = openai_compatible_models_info[model_name]
+        prompt = conv.to_openai_api_messages()
+        stream_iter = openai_api_stream_iter(
+            model_info["model_name"],
+            prompt,
+            temperature,
+            top_p,
+            max_new_tokens,
+            api_base=model_info["api_base"],
+            api_key=model_info["api_key"],
+        )
+    else:
+        # Query worker address
+        ret = requests.post(
+            controller_url + "/get_worker_address", json={"model": model_name}
+        )
+        worker_addr = ret.json()["address"]
+        logger.info(f"model_name: {model_name}, worker_addr: {worker_addr}")
+        # No available worker
+        if worker_addr == "":
+            conv.update_last_message(SERVER_ERROR_MSG)
+            yield (
+                state,
+                state.to_gradio_chatbot(),
+                disable_btn,
+                disable_btn,
+                disable_btn,
+                enable_btn,
+                enable_btn,
+            )
+            return
+        # Construct prompt.
+        # We need to call it here, so it will not be affected by "▌".
+        prompt = conv.get_prompt()
+        # Set repetition_penalty
+        if "t5" in model_name:
+            repetition_penalty = 1.2
+        else:
+            repetition_penalty = 1.0
+        stream_iter = model_worker_stream_iter(
+            conv,
+            model_name,
+            worker_addr,
+            prompt,
+            temperature,
+            repetition_penalty,
+            top_p,
+            max_new_tokens,
+        )
+    conv.update_last_message("▌")
+    yield (state, state.to_gradio_chatbot()) + (disable_btn,) * 5
+    try:
+        for i, data in enumerate(stream_iter):
+            if data["error_code"] == 0:
+                output = data["text"].strip()
+                conv.update_last_message(output + "▌")
+                yield (state, state.to_gradio_chatbot()) + (disable_btn,) * 5
+            else:
+                output = data["text"] + f"\n\n(error_code: {data['error_code']})"
+                conv.update_last_message(output)
+                yield (state, state.to_gradio_chatbot()) + (
+                    disable_btn,
+                    disable_btn,
+                    disable_btn,
+                    enable_btn,
+                    enable_btn,
+                )
+                return
+        output = data["text"].strip()
+        if "vicuna" in model_name:
+            output = post_process_code(output)
+        conv.update_last_message(output)
+        yield (state, state.to_gradio_chatbot()) + (enable_btn,) * 5
+    except requests.exceptions.RequestException as e:
+        conv.update_last_message(
+            f"{SERVER_ERROR_MSG}\n\n"
+            f"(error_code: {ErrorCode.GRADIO_REQUEST_ERROR}, {e})"
+        )
+        yield (state, state.to_gradio_chatbot()) + (
+            disable_btn,
+            disable_btn,
+            disable_btn,
+            enable_btn,
+            enable_btn,
+        )
+        return
+    except Exception as e:
+        conv.update_last_message(
+            f"{SERVER_ERROR_MSG}\n\n"
+            f"(error_code: {ErrorCode.GRADIO_STREAM_UNKNOWN_ERROR}, {e})"
+        )
+        yield (state, state.to_gradio_chatbot()) + (
+            disable_btn,
+            disable_btn,
+            disable_btn,
+            enable_btn,
+            enable_btn,
+        )
+        return
+    finish_tstamp = time.time()
+    logger.info(f"{output}")
+    with open(get_conv_log_filename(), "a") as fout:
+        data = {
+            "tstamp": round(finish_tstamp, 4),
+            "type": "chat",
+            "model": model_name,
+            "gen_params": {
+                "temperature": temperature,
+                "top_p": top_p,
+                "max_new_tokens": max_new_tokens,
+            },
+            "start": round(start_tstamp, 4),
+            "finish": round(finish_tstamp, 4),
+            "state": state.dict(),
+            "ip": get_ip(request),
+        }
+        fout.write(json.dumps(data) + "\n")
+block_css = """
+#notice_markdown {
+    font-size: 110%
+}
+#notice_markdown th {
+    display: none;
+}
+#notice_markdown td {
+    padding-top: 6px;
+    padding-bottom: 6px;
+}
+#leaderboard_markdown {
+    font-size: 110%
+}
+#leaderboard_markdown td {
+    padding-top: 6px;
+    padding-bottom: 6px;
+}
+#leaderboard_dataframe td {
+    line-height: 0.1em;
+}
+#about_markdown {
+    font-size: 110%
+}
+#input_box textarea {
+}
+footer {
+    display:none !important
+}
+.image-container {
+    display: flex;
+    align-items: center;
+    padding: 1px;
+}
+.image-container img {
+    margin: 0 30px;
+    height: 20px;
+    max-height: 100%;
+    width: auto;
+    max-width: 20%;
+}
+.image-about img {
+    margin: 0 30px;
+    margin-top:  30px;
+    height: 60px;
+    max-height: 100%;
+    width: auto;
+    float: left;
+}
+"""
+def get_model_description_md(models):
+    model_description_md = """
+| | | |
+| ---- | ---- | ---- |
+"""
+    ct = 0
+    visited = set()
+    for i, name in enumerate(models):
+        minfo = get_model_info(name)
+        if minfo.simple_name in visited:
+            continue
+        visited.add(minfo.simple_name)
+        one_model_md = f"[{minfo.simple_name}]({minfo.link}): {minfo.description}"
+        if ct % 3 == 0:
+            model_description_md += "|"
+        model_description_md += f" {one_model_md} |"
+        if ct % 3 == 2:
+            model_description_md += "\n"
+        ct += 1
+    return model_description_md
+def build_about():
+    about_markdown = f"""
+# About Us
+Chatbot Arena is an open-source research project developed by members from [LMSYS](https://lmsys.org/about/) and UC Berkeley [SkyLab](https://sky.cs.berkeley.edu/).  Our mission is to build an open crowdsourced platform to collect human feedback and evaluate LLMs under real-world scenarios. We open-source our code at [GitHub](https://github.com/lm-sys/FastChat) and release chat and human feedback datasets [here](https://github.com/lm-sys/FastChat/blob/main/docs/dataset_release.md). We invite everyone to join us in this journey!
+## Read More
+- Chatbot Arena [launch post](https://lmsys.org/blog/2023-05-03-arena/), [data release](https://lmsys.org/blog/2023-07-20-dataset/)
+- LMSYS-Chat-1M [report](https://arxiv.org/abs/2309.11998)
+## Core Members
+[Lianmin Zheng](https://lmzheng.net/), [Wei-Lin Chiang](https://infwinston.github.io/), [Ying Sheng](https://sites.google.com/view/yingsheng/home), [Siyuan Zhuang](https://scholar.google.com/citations?user=KSZmI5EAAAAJ)
+## Advisors
+[Ion Stoica](http://people.eecs.berkeley.edu/~istoica/), [Joseph E. Gonzalez](https://people.eecs.berkeley.edu/~jegonzal/), [Hao Zhang](https://cseweb.ucsd.edu/~haozhang/)
+## Contact Us
+- Follow our [Twitter](https://twitter.com/lmsysorg), [Discord](https://discord.gg/HSWAKCrnFx) or email us at [email protected]
+- File issues on [GitHub](https://github.com/lm-sys/FastChat)
+- Download our datasets and models on [HuggingFace](https://huggingface.co/lmsys)
+## Sponsors
+We thank [Kaggle](https://www.kaggle.com/), [MBZUAI](https://mbzuai.ac.ae/), [Anyscale](https://www.anyscale.com/), [HuggingFace](https://huggingface.co/) for their generous sponsorship.
+Learn more about partnership [here](https://lmsys.org/donations/).
+<div class="image-about">
+    <img src="https://upload.wikimedia.org/wikipedia/commons/thumb/7/7c/Kaggle_logo.png/400px-Kaggle_logo.png" alt="Image 1">
+    <img src="https://upload.wikimedia.org/wikipedia/en/5/55/Mohamed_bin_Zayed_University_of_Artificial_Intelligence_logo.png" alt="Image 2">
+    <img src="https://docs.anyscale.com/site-assets/logo.png" alt="Image 3">
+    <img src="https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.png" alt="Image 4">
+</div>
+"""
+    # state = gr.State()
+    gr.Markdown(about_markdown, elem_id="about_markdown")
+    # return [state]
+def build_single_model_ui(models, add_promotion_links=False):
+    promotion = (
+        """
+- | [GitHub](https://github.com/lm-sys/FastChat) | [Dataset](https://github.com/lm-sys/FastChat/blob/main/docs/dataset_release.md) | [Twitter](https://twitter.com/lmsysorg) | [Discord](https://discord.gg/HSWAKCrnFx) |
+- Introducing Llama 2: The Next Generation Open Source Large Language Model. [[Website]](https://ai.meta.com/llama/)
+- Vicuna: An Open-Source Chatbot Impressing GPT-4 with 90% ChatGPT Quality. [[Blog]](https://lmsys.org/blog/2023-03-30-vicuna/)
+"""
+        if add_promotion_links
+        else ""
+    )
+    notice_markdown = f"""
+# 🏔️ Chat with Open Large Language Models
+{promotion}
+## 👉 Choose any model to chat
+"""
+    state = gr.State()
+    model_description_md = get_model_description_md(models)
+    gr.Markdown(notice_markdown + model_description_md, elem_id="notice_markdown")
+    with gr.Row(elem_id="model_selector_row"):
+        model_selector = gr.Dropdown(
+            choices=models,
+            value=models[0] if len(models) > 0 else "",
+            interactive=True,
+            show_label=False,
+            container=False,
+        )
+    chatbot = gr.Chatbot(
+        elem_id="chatbot",
+        label="Scroll down and start chatting",
+        height=550,
+    )
+    with gr.Row():
+        with gr.Column(scale=20):
+            textbox = gr.Textbox(
+                show_label=False,
+                placeholder="Enter your prompt here and press ENTER",
+                container=False,
+                elem_id="input_box",
+            )
+        with gr.Column(scale=1, min_width=50):
+            send_btn = gr.Button(value="Send", variant="primary")
+    with gr.Row() as button_row:
+        upvote_btn = gr.Button(value="👍  Upvote", interactive=False)
+        downvote_btn = gr.Button(value="👎  Downvote", interactive=False)
+        flag_btn = gr.Button(value="⚠️  Flag", interactive=False)
+        regenerate_btn = gr.Button(value="🔄  Regenerate", interactive=False)
+        clear_btn = gr.Button(value="🗑️  Clear history", interactive=False)
+    with gr.Accordion("Parameters", open=False) as parameter_row:
+        temperature = gr.Slider(
+            minimum=0.0,
+            maximum=1.0,
+            value=0.7,
+            step=0.1,
+            interactive=True,
+            label="Temperature",
+        )
+        top_p = gr.Slider(
+            minimum=0.0,
+            maximum=1.0,
+            value=1.0,
+            step=0.1,
+            interactive=True,
+            label="Top P",
+        )
+        max_output_tokens = gr.Slider(
+            minimum=16,
+            maximum=3072,
+            value=2048,
+            step=1,
+            interactive=True,
+            label="Max output tokens",
+        )
+    if add_promotion_links:
+        gr.Markdown(acknowledgment_md)
+    # Register listeners
+    btn_list = [upvote_btn, downvote_btn, flag_btn, regenerate_btn, clear_btn]
+    upvote_btn.click(
+        upvote_last_response,
+        [state, model_selector],
+        [textbox, upvote_btn, downvote_btn, flag_btn],
+    )
+    downvote_btn.click(
+        downvote_last_response,
+        [state, model_selector],
+        [textbox, upvote_btn, downvote_btn, flag_btn],
+    )
+    flag_btn.click(
+        flag_last_response,
+        [state, model_selector],
+        [textbox, upvote_btn, downvote_btn, flag_btn],
+    )
+    regenerate_btn.click(regenerate, state, [state, chatbot, textbox] + btn_list).then(
+        bot_response,
+        [state, temperature, top_p, max_output_tokens],
+        [state, chatbot] + btn_list,
+    )
+    clear_btn.click(clear_history, None, [state, chatbot, textbox] + btn_list)
+    model_selector.change(clear_history, None, [state, chatbot, textbox] + btn_list)
+    textbox.submit(
+        add_text, [state, model_selector, textbox], [state, chatbot, textbox] + btn_list
+    ).then(
+        bot_response,
+        [state, temperature, top_p, max_output_tokens],
+        [state, chatbot] + btn_list,
+    )
+    send_btn.click(
+        add_text,
+        [state, model_selector, textbox],
+        [state, chatbot, textbox] + btn_list,
+    ).then(
+        bot_response,
+        [state, temperature, top_p, max_output_tokens],
+        [state, chatbot] + btn_list,
+    )
+    return [state, model_selector]
+def build_demo(models):
+    with gr.Blocks(
+        title="Chat with Open Large Language Models",
+        theme=gr.themes.Default(),
+        css=block_css,
+    ) as demo:
+        url_params = gr.JSON(visible=False)
+        state, model_selector = build_single_model_ui(models)
+        if args.model_list_mode not in ["once", "reload"]:
+            raise ValueError(f"Unknown model list mode: {args.model_list_mode}")
+        if args.show_terms_of_use:
+            load_js = get_window_url_params_with_tos_js
+        else:
+            load_js = get_window_url_params_js
+        demo.load(
+            load_demo,
+            [url_params],
+            [
+                state,
+                model_selector,
+            ],
+            _js=load_js,
+        )
+    return demo
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--host", type=str, default="0.0.0.0")
+    parser.add_argument("--port", type=int)
+    parser.add_argument(
+        "--conv-template",
+        type=str,
+        default="megrez",
+        help="The address of the controller",
+    )
+    parser.add_argument(
+        "--share",
+        action="store_true",
+        help="Whether to generate a public, shareable link",
+    )
+    parser.add_argument(
+        "--controller-url",
+        type=str,
+        default="http://localhost:21001",
+        help="The address of the controller",
+    )
+    parser.add_argument(
+        "--concurrency-count",
+        type=int,
+        default=10,
+        help="The concurrency count of the gradio queue",
+    )
+    parser.add_argument(
+        "--model-list-mode",
+        type=str,
+        default="once",
+        choices=["once", "reload"],
+        help="Whether to load the model list once or reload the model list every time",
+    )
+    parser.add_argument(
+        "--moderate",
+        action="store_true",
+        help="Enable content moderation to block unsafe inputs",
+    )
+    parser.add_argument(
+        "--show-terms-of-use",
+        action="store_true",
+        help="Shows term of use before loading the demo",
+    )
+    parser.add_argument(
+        "--add-chatgpt",
+        action="store_true",
+        help="Add OpenAI's ChatGPT models (gpt-3.5-turbo, gpt-4)",
+    )
+    parser.add_argument(
+        "--add-claude",
+        action="store_true",
+        help="Add Anthropic's Claude models (claude-2, claude-instant-1)",
+    )
+    parser.add_argument(
+        "--add-palm",
+        action="store_true",
+        help="Add Google's PaLM model (PaLM 2 for Chat: chat-bison@001)",
+    )
+    parser.add_argument(
+        "--register-openai-compatible-models",
+        type=str,
+        help="Register custom OpenAI API compatible models by loading them from a JSON file",
+    )
+    parser.add_argument(
+        "--gradio-auth-path",
+        type=str,
+        help='Set the gradio authentication file path. The file should contain one or more user:password pairs in this format: "u1:p1,u2:p2,u3:p3"',
+    )
+    args = parser.parse_args()
+    logger.info(f"args: {args}")
+    CONV_TEMPLATE = args.conv_template
+    # Set global variables
+    set_global_vars(args.controller_url, args.moderate)
+    models = get_model_list(
+        args.controller_url,
+        args.register_openai_compatible_models,
+        args.add_chatgpt,
+        args.add_claude,
+        args.add_palm,
+    )
+    # Set authorization credentials
+    auth = None
+    if args.gradio_auth_path is not None:
+        auth = parse_gradio_auth_creds(args.gradio_auth_path)
+    # Launch the demo
+    demo = build_demo(models)
+    ret = demo.queue(
+        concurrency_count=args.concurrency_count, status_update_rate=10, api_open=False
+    ).launch(
+        server_name=args.host,
+        server_port=args.port,
+        share=args.share,
+        max_threads=200,
+        auth=auth,
+    )
+    from IPython import embed;embed()

gradio_web_server_multi.py ADDED Viewed

	@@ -0,0 +1,270 @@

+"""
+The gradio demo server with multiple tabs.
+It supports chatting with a single model or chatting with two models side-by-side.
+"""
+import argparse
+import pickle
+import time
+import gradio as gr
+from fastchat.constants import (
+    SESSION_EXPIRATION_TIME,
+)
+from fastchat.serve.gradio_block_arena_anony import (
+    build_side_by_side_ui_anony,
+    load_demo_side_by_side_anony,
+    set_global_vars_anony,
+)
+from fastchat.serve.gradio_block_arena_named import (
+    build_side_by_side_ui_named,
+    load_demo_side_by_side_named,
+    set_global_vars_named,
+)
+from fastchat.serve.gradio_web_server import (
+    set_global_vars,
+    block_css,
+    build_single_model_ui,
+    build_about,
+    get_model_list,
+    load_demo_single,
+    ip_expiration_dict,
+    get_ip,
+)
+from fastchat.serve.monitor.monitor import build_leaderboard_tab
+from fastchat.utils import (
+    build_logger,
+    get_window_url_params_js,
+    get_window_url_params_with_tos_js,
+    parse_gradio_auth_creds,
+)
+logger = build_logger("gradio_web_server_multi", "gradio_web_server_multi.log")
+def load_demo(url_params, request: gr.Request):
+    global models
+    ip = get_ip(request)
+    logger.info(f"load_demo. ip: {ip}. params: {url_params}")
+    ip_expiration_dict[ip] = time.time() + SESSION_EXPIRATION_TIME
+    selected = 0
+    if "arena" in url_params:
+        selected = 0
+    elif "compare" in url_params:
+        selected = 1
+    elif "single" in url_params:
+        selected = 2
+    elif "leaderboard" in url_params:
+        selected = 3
+    if args.model_list_mode == "reload":
+        if args.anony_only_for_proprietary_model:
+            models = get_model_list(
+                args.controller_url,
+                args.register_openai_compatible_models,
+                False,
+                False,
+                False,
+            )
+        else:
+            models = get_model_list(
+                args.controller_url,
+                args.register_openai_compatible_models,
+                args.add_chatgpt,
+                args.add_claude,
+                args.add_palm,
+            )
+    single_updates = load_demo_single(models, url_params)
+    models_anony = list(models)
+    if args.anony_only_for_proprietary_model:
+        # Only enable these models in anony battles.
+        if args.add_chatgpt:
+            models_anony += [
+                "gpt-4",
+                "gpt-3.5-turbo",
+                "gpt-4-turbo",
+                "gpt-3.5-turbo-1106",
+            ]
+        if args.add_claude:
+            models_anony += ["claude-2", "claude-1", "claude-instant-1"]
+        if args.add_palm:
+            models_anony += ["palm-2"]
+    models_anony = list(set(models_anony))
+    side_by_side_anony_updates = load_demo_side_by_side_anony(models_anony, url_params)
+    side_by_side_named_updates = load_demo_side_by_side_named(models, url_params)
+    return (
+        (gr.Tabs.update(selected=selected),)
+        + single_updates
+        + side_by_side_anony_updates
+        + side_by_side_named_updates
+    )
+def build_demo(models, elo_results_file, leaderboard_table_file):
+    text_size = gr.themes.sizes.text_md
+    with gr.Blocks(
+        title="Chat with Open Large Language Models",
+        theme=gr.themes.Default(text_size=text_size),
+        css=block_css,
+    ) as demo:
+        with gr.Tabs() as tabs:
+            with gr.Tab("Arena (battle)", id=0):
+                side_by_side_anony_list = build_side_by_side_ui_anony(models)
+            with gr.Tab("Arena (side-by-side)", id=1):
+                side_by_side_named_list = build_side_by_side_ui_named(models)
+            with gr.Tab("Direct Chat", id=2):
+                single_model_list = build_single_model_ui(
+                    models, add_promotion_links=True
+                )
+            if elo_results_file:
+                with gr.Tab("Leaderboard", id=3):
+                    build_leaderboard_tab(elo_results_file, leaderboard_table_file)
+            with gr.Tab("About Us", id=4):
+                about = build_about()
+        url_params = gr.JSON(visible=False)
+        if args.model_list_mode not in ["once", "reload"]:
+            raise ValueError(f"Unknown model list mode: {args.model_list_mode}")
+        if args.show_terms_of_use:
+            load_js = get_window_url_params_with_tos_js
+        else:
+            load_js = get_window_url_params_js
+        demo.load(
+            load_demo,
+            [url_params],
+            [tabs]
+            + single_model_list
+            + side_by_side_anony_list
+            + side_by_side_named_list,
+            _js=load_js,
+        )
+    return demo
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--host", type=str, default="0.0.0.0")
+    parser.add_argument("--port", type=int)
+    parser.add_argument(
+        "--share",
+        action="store_true",
+        help="Whether to generate a public, shareable link",
+    )
+    parser.add_argument(
+        "--controller-url",
+        type=str,
+        default="http://localhost:21001",
+        help="The address of the controller",
+    )
+    parser.add_argument(
+        "--concurrency-count",
+        type=int,
+        default=10,
+        help="The concurrency count of the gradio queue",
+    )
+    parser.add_argument(
+        "--model-list-mode",
+        type=str,
+        default="once",
+        choices=["once", "reload"],
+        help="Whether to load the model list once or reload the model list every time.",
+    )
+    parser.add_argument(
+        "--moderate",
+        action="store_true",
+        help="Enable content moderation to block unsafe inputs",
+    )
+    parser.add_argument(
+        "--show-terms-of-use",
+        action="store_true",
+        help="Shows term of use before loading the demo",
+    )
+    parser.add_argument(
+        "--add-chatgpt",
+        action="store_true",
+        help="Add OpenAI's ChatGPT models (gpt-3.5-turbo, gpt-4)",
+    )
+    parser.add_argument(
+        "--add-claude",
+        action="store_true",
+        help="Add Anthropic's Claude models (claude-2, claude-instant-1)",
+    )
+    parser.add_argument(
+        "--add-palm",
+        action="store_true",
+        help="Add Google's PaLM model (PaLM 2 for Chat: chat-bison@001)",
+    )
+    parser.add_argument(
+        "--anony-only-for-proprietary-model",
+        action="store_true",
+        help="Only add ChatGPT, Claude, Bard under anony battle tab",
+    )
+    parser.add_argument(
+        "--register-openai-compatible-models",
+        type=str,
+        help="Register custom OpenAI API compatible models by loading them from a JSON file",
+    )
+    parser.add_argument(
+        "--gradio-auth-path",
+        type=str,
+        help='Set the gradio authentication file path. The file should contain one or more user:password pairs in this format: "u1:p1,u2:p2,u3:p3"',
+        default=None,
+    )
+    parser.add_argument(
+        "--elo-results-file", type=str, help="Load leaderboard results and plots"
+    )
+    parser.add_argument(
+        "--leaderboard-table-file", type=str, help="Load leaderboard results and plots"
+    )
+    args = parser.parse_args()
+    logger.info(f"args: {args}")
+    # Set global variables
+    set_global_vars(args.controller_url, args.moderate)
+    set_global_vars_named(args.moderate)
+    set_global_vars_anony(args.moderate)
+    if args.anony_only_for_proprietary_model:
+        models = get_model_list(
+            args.controller_url,
+            args.register_openai_compatible_models,
+            False,
+            False,
+            False,
+        )
+    else:
+        models = get_model_list(
+            args.controller_url,
+            args.register_openai_compatible_models,
+            args.add_chatgpt,
+            args.add_claude,
+            args.add_palm,
+        )
+    # Set authorization credentials
+    auth = None
+    if args.gradio_auth_path is not None:
+        auth = parse_gradio_auth_creds(args.gradio_auth_path)
+    # Launch the demo
+    demo = build_demo(models, args.elo_results_file, args.leaderboard_table_file)
+    demo.queue(
+        concurrency_count=args.concurrency_count, status_update_rate=10, api_open=False
+    ).launch(
+        server_name=args.host,
+        server_port=args.port,
+        share=args.share,
+        max_threads=200,
+        auth=auth,
+    )

huggingface_api.py ADDED Viewed

	@@ -0,0 +1,73 @@

+"""
+Use FastChat with Hugging Face generation APIs.
+Usage:
+python3 -m fastchat.serve.huggingface_api --model lmsys/vicuna-7b-v1.5
+python3 -m fastchat.serve.huggingface_api --model lmsys/fastchat-t5-3b-v1.0
+"""
+import argparse
+import torch
+from fastchat.model import load_model, get_conversation_template, add_model_args
+@torch.inference_mode()
+def main(args):
+    # Load model
+    model, tokenizer = load_model(
+        args.model_path,
+        device=args.device,
+        num_gpus=args.num_gpus,
+        max_gpu_memory=args.max_gpu_memory,
+        load_8bit=args.load_8bit,
+        cpu_offloading=args.cpu_offloading,
+        revision=args.revision,
+        debug=args.debug,
+    )
+    # Build the prompt with a conversation template
+    msg = args.message
+    conv = get_conversation_template(args.model_path)
+    conv.append_message(conv.roles[0], msg)
+    conv.append_message(conv.roles[1], None)
+    prompt = conv.get_prompt()
+    # Run inference
+    inputs = tokenizer([prompt], return_tensors="pt").to(args.device)
+    output_ids = model.generate(
+        **inputs,
+        do_sample=True if args.temperature > 1e-5 else False,
+        temperature=args.temperature,
+        repetition_penalty=args.repetition_penalty,
+        max_new_tokens=args.max_new_tokens,
+    )
+    if model.config.is_encoder_decoder:
+        output_ids = output_ids[0]
+    else:
+        output_ids = output_ids[0][len(inputs["input_ids"][0]) :]
+    outputs = tokenizer.decode(
+        output_ids, skip_special_tokens=True, spaces_between_special_tokens=False
+    )
+    # Print results
+    print(f"{conv.roles[0]}: {msg}")
+    print(f"{conv.roles[1]}: {outputs}")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    add_model_args(parser)
+    parser.add_argument("--temperature", type=float, default=0.7)
+    parser.add_argument("--repetition_penalty", type=float, default=1.0)
+    parser.add_argument("--max-new-tokens", type=int, default=512)
+    parser.add_argument("--debug", action="store_true")
+    parser.add_argument("--message", type=str, default="Hello! Who are you?")
+    args = parser.parse_args()
+    # Reset default repetition penalty for T5 models.
+    if "t5" in args.model_path and args.repetition_penalty == 1.0:
+        args.repetition_penalty = 1.2
+    main(args)

huggingface_api_worker.py ADDED Viewed

	@@ -0,0 +1,391 @@

+"""
+A model worker that calls huggingface inference endpoint.
+Register models in a JSON file with the following format:
+{
+    "falcon-180b-chat": {
+        "model_path": "tiiuae/falcon-180B-chat",
+        "api_base": "https://api-inference.huggingface.co/models",
+        "token": "hf_xxx",
+        "context_length": 2048,
+        "model_names": "falcon-180b-chat",
+        "conv_template": null
+    }
+}
+"model_path", "api_base", "token", and "context_length" are necessary, while others are optional.
+"""
+import argparse
+import asyncio
+import json
+import uuid
+from typing import List, Optional
+import requests
+import uvicorn
+from fastapi import BackgroundTasks, FastAPI, Request
+from fastapi.responses import JSONResponse, StreamingResponse
+from huggingface_hub import InferenceClient
+from fastchat.constants import SERVER_ERROR_MSG, ErrorCode
+from fastchat.serve.base_model_worker import BaseModelWorker
+from fastchat.utils import build_logger
+worker_id = str(uuid.uuid4())[:8]
+logger = build_logger("model_worker", f"model_worker_{worker_id}.log")
+workers = []
+worker_map = {}
+app = FastAPI()
+# reference to
+# https://github.com/philschmid/easyllm/blob/cbd908b3b3f44a97a22cb0fc2c93df3660bacdad/easyllm/clients/huggingface.py#L374-L392
+def get_gen_kwargs(
+    params,
+    seed: Optional[int] = None,
+):
+    stop = params.get("stop", None)
+    if isinstance(stop, list):
+        stop_sequences = stop
+    elif isinstance(stop, str):
+        stop_sequences = [stop]
+    else:
+        stop_sequences = []
+    gen_kwargs = {
+        "do_sample": True,
+        "return_full_text": bool(params.get("echo", False)),
+        "max_new_tokens": int(params.get("max_new_tokens", 256)),
+        "top_p": float(params.get("top_p", 1.0)),
+        "temperature": float(params.get("temperature", 1.0)),
+        "stop_sequences": stop_sequences,
+        "repetition_penalty": float(params.get("repetition_penalty", 1.0)),
+        "top_k": params.get("top_k", None),
+        "seed": seed,
+    }
+    if gen_kwargs["top_p"] == 1:
+        gen_kwargs["top_p"] = 0.9999999
+    if gen_kwargs["top_p"] == 0:
+        gen_kwargs.pop("top_p")
+    if gen_kwargs["temperature"] == 0:
+        gen_kwargs.pop("temperature")
+        gen_kwargs["do_sample"] = False
+    return gen_kwargs
+def could_be_stop(text, stop):
+    for s in stop:
+        if any(text.endswith(s[:i]) for i in range(1, len(s) + 1)):
+            return True
+    return False
+class HuggingfaceApiWorker(BaseModelWorker):
+    def __init__(
+        self,
+        controller_addr: str,
+        worker_addr: str,
+        worker_id: str,
+        model_path: str,
+        api_base: str,
+        token: str,
+        context_length: int,
+        model_names: List[str],
+        limit_worker_concurrency: int,
+        no_register: bool,
+        conv_template: Optional[str] = None,
+        seed: Optional[int] = None,
+        **kwargs,
+    ):
+        super().__init__(
+            controller_addr,
+            worker_addr,
+            worker_id,
+            model_path,
+            model_names,
+            limit_worker_concurrency,
+            conv_template=conv_template,
+        )
+        self.model_path = model_path
+        self.api_base = api_base
+        self.token = token
+        self.context_len = context_length
+        self.seed = seed
+        logger.info(
+            f"Connecting with huggingface api {self.model_path} as {self.model_names} on worker {worker_id} ..."
+        )
+        if not no_register:
+            self.init_heart_beat()
+    def count_token(self, params):
+        # No tokenizer here
+        ret = {
+            "count": 0,
+            "error_code": 0,
+        }
+        return ret
+    def generate_stream_gate(self, params):
+        self.call_ct += 1
+        prompt = params["prompt"]
+        gen_kwargs = get_gen_kwargs(params, seed=self.seed)
+        stop = gen_kwargs["stop_sequences"]
+        if "falcon" in self.model_path and "chat" in self.model_path:
+            stop.extend(["\nUser:", "<|endoftext|>", " User:", "###"])
+            stop = list(set(stop))
+            gen_kwargs["stop_sequences"] = stop
+        logger.info(f"prompt: {prompt}")
+        logger.info(f"gen_kwargs: {gen_kwargs}")
+        try:
+            if self.model_path == "":
+                url = f"{self.api_base}"
+            else:
+                url = f"{self.api_base}/{self.model_path}"
+            client = InferenceClient(url, token=self.token)
+            res = client.text_generation(
+                prompt, stream=True, details=True, **gen_kwargs
+            )
+            reason = None
+            text = ""
+            for chunk in res:
+                if chunk.token.special:
+                    continue
+                text += chunk.token.text
+                s = next((x for x in stop if text.endswith(x)), None)
+                if s is not None:
+                    text = text[: -len(s)]
+                    reason = "stop"
+                    break
+                if could_be_stop(text, stop):
+                    continue
+                if (
+                    chunk.details is not None
+                    and chunk.details.finish_reason is not None
+                ):
+                    reason = chunk.details.finish_reason
+                if reason not in ["stop", "length"]:
+                    reason = None
+                ret = {
+                    "text": text,
+                    "error_code": 0,
+                    "finish_reason": reason,
+                }
+                yield json.dumps(ret).encode() + b"\0"
+        except Exception as e:
+            ret = {
+                "text": f"{SERVER_ERROR_MSG}\n\n({e})",
+                "error_code": ErrorCode.INTERNAL_ERROR,
+            }
+            yield json.dumps(ret).encode() + b"\0"
+    def generate_gate(self, params):
+        for x in self.generate_stream_gate(params):
+            pass
+        return json.loads(x[:-1].decode())
+    def get_embeddings(self, params):
+        raise NotImplementedError()
+def release_worker_semaphore(worker):
+    worker.semaphore.release()
+def acquire_worker_semaphore(worker):
+    if worker.semaphore is None:
+        worker.semaphore = asyncio.Semaphore(worker.limit_worker_concurrency)
+    return worker.semaphore.acquire()
+def create_background_tasks(worker):
+    background_tasks = BackgroundTasks()
+    background_tasks.add_task(lambda: release_worker_semaphore(worker))
+    return background_tasks
+@app.post("/worker_generate_stream")
+async def api_generate_stream(request: Request):
+    params = await request.json()
+    worker = worker_map[params["model"]]
+    await acquire_worker_semaphore(worker)
+    generator = worker.generate_stream_gate(params)
+    background_tasks = create_background_tasks(worker)
+    return StreamingResponse(generator, background=background_tasks)
+@app.post("/worker_generate")
+async def api_generate(request: Request):
+    params = await request.json()
+    worker = worker_map[params["model"]]
+    await acquire_worker_semaphore(worker)
+    output = worker.generate_gate(params)
+    release_worker_semaphore(worker)
+    return JSONResponse(output)
+@app.post("/worker_get_embeddings")
+async def api_get_embeddings(request: Request):
+    params = await request.json()
+    worker = worker_map[params["model"]]
+    await acquire_worker_semaphore(worker)
+    embedding = worker.get_embeddings(params)
+    release_worker_semaphore(worker)
+    return JSONResponse(content=embedding)
+@app.post("/worker_get_status")
+async def api_get_status(request: Request):
+    return {
+        "model_names": [m for w in workers for m in w.model_names],
+        "speed": 1,
+        "queue_length": sum([w.get_queue_length() for w in workers]),
+    }
+@app.post("/count_token")
+async def api_count_token(request: Request):
+    params = await request.json()
+    worker = worker_map[params["model"]]
+    return worker.count_token(params)
+@app.post("/worker_get_conv_template")
+async def api_get_conv(request: Request):
+    params = await request.json()
+    worker = worker_map[params["model"]]
+    return worker.get_conv_template()
+@app.post("/model_details")
+async def api_model_details(request: Request):
+    params = await request.json()
+    worker = worker_map[params["model"]]
+    return {"context_length": worker.context_len}
+def create_huggingface_api_worker():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--host", type=str, default="localhost")
+    parser.add_argument("--port", type=int, default=21002)
+    parser.add_argument("--worker-address", type=str, default="http://localhost:21002")
+    parser.add_argument(
+        "--controller-address", type=str, default="http://localhost:21001"
+    )
+    # all model-related parameters are listed in --model-info-file
+    parser.add_argument(
+        "--model-info-file",
+        type=str,
+        required=True,
+        help="Huggingface API model's info file path",
+    )
+    parser.add_argument(
+        "--limit-worker-concurrency",
+        type=int,
+        default=5,
+        help="Limit the model concurrency to prevent OOM.",
+    )
+    parser.add_argument("--no-register", action="store_true")
+    parser.add_argument(
+        "--seed",
+        type=int,
+        default=None,
+        help="Overwrite the random seed for each generation.",
+    )
+    args = parser.parse_args()
+    with open(args.model_info_file, "r", encoding="UTF-8") as f:
+        model_info = json.load(f)
+    logger.info(f"args: {args}")
+    model_path_list = []
+    api_base_list = []
+    token_list = []
+    context_length_list = []
+    model_names_list = []
+    conv_template_list = []
+    for m in model_info:
+        model_path_list.append(model_info[m]["model_path"])
+        api_base_list.append(model_info[m]["api_base"])
+        token_list.append(model_info[m]["token"])
+        context_length = model_info[m]["context_length"]
+        model_names = model_info[m].get("model_names", [m.split("/")[-1]])
+        if isinstance(model_names, str):
+            model_names = [model_names]
+        conv_template = model_info[m].get("conv_template", None)
+        context_length_list.append(context_length)
+        model_names_list.append(model_names)
+        conv_template_list.append(conv_template)
+    logger.info(f"Model paths: {model_path_list}")
+    logger.info(f"API bases: {api_base_list}")
+    logger.info(f"Tokens: {token_list}")
+    logger.info(f"Context lengths: {context_length_list}")
+    logger.info(f"Model names: {model_names_list}")
+    logger.info(f"Conv templates: {conv_template_list}")
+    for (
+        model_names,
+        conv_template,
+        model_path,
+        api_base,
+        token,
+        context_length,
+    ) in zip(
+        model_names_list,
+        conv_template_list,
+        model_path_list,
+        api_base_list,
+        token_list,
+        context_length_list,
+    ):
+        m = HuggingfaceApiWorker(
+            args.controller_address,
+            args.worker_address,
+            worker_id,
+            model_path,
+            api_base,
+            token,
+            context_length,
+            model_names,
+            args.limit_worker_concurrency,
+            no_register=args.no_register,
+            conv_template=conv_template,
+            seed=args.seed,
+        )
+        workers.append(m)
+        for name in model_names:
+            worker_map[name] = m
+    # register all the models
+    url = args.controller_address + "/register_worker"
+    data = {
+        "worker_name": workers[0].worker_addr,
+        "check_heart_beat": not args.no_register,
+        "worker_status": {
+            "model_names": [m for w in workers for m in w.model_names],
+            "speed": 1,
+            "queue_length": sum([w.get_queue_length() for w in workers]),
+        },
+    }
+    r = requests.post(url, json=data)
+    assert r.status_code == 200
+    return args, workers
+if __name__ == "__main__":
+    args, workers = create_huggingface_api_worker()
+    uvicorn.run(app, host=args.host, port=args.port, log_level="info")

inference.py ADDED Viewed

	@@ -0,0 +1,596 @@

+"""Inference for FastChat models."""
+import abc
+import gc
+import json
+import math
+import os
+import sys
+import time
+from typing import Iterable, Optional, Dict
+import warnings
+import psutil
+import torch
+from transformers import (
+    AutoTokenizer,
+    AutoModelForCausalLM,
+    LlamaTokenizer,
+    LlamaForCausalLM,
+    AutoModel,
+    AutoModelForSeq2SeqLM,
+    T5Tokenizer,
+    AutoConfig,
+)
+from transformers.generation.logits_process import (
+    LogitsProcessorList,
+    RepetitionPenaltyLogitsProcessor,
+    TemperatureLogitsWarper,
+    TopKLogitsWarper,
+    TopPLogitsWarper,
+)
+from fastchat.conversation import get_conv_template, SeparatorStyle
+from fastchat.model.model_adapter import (
+    load_model,
+    get_conversation_template,
+    get_generate_stream_function,
+)
+from fastchat.modules.awq import AWQConfig
+from fastchat.modules.gptq import GptqConfig
+from fastchat.modules.exllama import ExllamaConfig
+from fastchat.modules.xfastertransformer import XftConfig
+from fastchat.utils import is_partial_stop, is_sentence_complete, get_context_length
+def prepare_logits_processor(
+    temperature: float, repetition_penalty: float, top_p: float, top_k: int
+) -> LogitsProcessorList:
+    processor_list = LogitsProcessorList()
+    # TemperatureLogitsWarper doesn't accept 0.0, 1.0 makes it a no-op so we skip two cases.
+    if temperature >= 1e-5 and temperature != 1.0:
+        processor_list.append(TemperatureLogitsWarper(temperature))
+    if repetition_penalty > 1.0:
+        processor_list.append(RepetitionPenaltyLogitsProcessor(repetition_penalty))
+    if 1e-8 <= top_p < 1.0:
+        processor_list.append(TopPLogitsWarper(top_p))
+    if top_k > 0:
+        processor_list.append(TopKLogitsWarper(top_k))
+    return processor_list
+@torch.inference_mode()
+def generate_stream(
+    model,
+    tokenizer,
+    params: Dict,
+    device: str,
+    context_len: int,
+    stream_interval: int = 2,
+    judge_sent_end: bool = False,
+):
+    if hasattr(model, "device"):
+        device = model.device
+    # Read parameters
+    prompt = params["prompt"]
+    len_prompt = len(prompt)
+    temperature = float(params.get("temperature", 1.0))
+    repetition_penalty = float(params.get("repetition_penalty", 1.0))
+    top_p = float(params.get("top_p", 1.0))
+    top_k = int(params.get("top_k", -1))  # -1 means disable
+    max_new_tokens = int(params.get("max_new_tokens", 256))
+    logprobs = params.get("logprobs", None)  # FIXME: Support logprobs>1.
+    echo = bool(params.get("echo", True))
+    stop_str = params.get("stop", None)
+    stop_token_ids = params.get("stop_token_ids", None) or []
+    if tokenizer.eos_token_id not in stop_token_ids:
+        stop_token_ids.append(tokenizer.eos_token_id)
+    if params.get('none_stop'):
+        stop_token_ids = []
+    skip_special_tokens = params.get('skip_special_tokens')
+    logits_processor = prepare_logits_processor(
+        temperature, repetition_penalty, top_p, top_k
+    )
+    input_ids = tokenizer(prompt).input_ids
+    if model.config.is_encoder_decoder:
+        max_src_len = context_len
+    else:  # truncate
+        max_src_len = context_len - max_new_tokens - 1
+    input_ids = input_ids[-max_src_len:]
+    output_ids = list(input_ids)
+    input_echo_len = len(input_ids)
+    if model.config.is_encoder_decoder:
+        if logprobs is not None:  # FIXME: Support logprobs for encoder-decoder models.
+            raise NotImplementedError
+        encoder_output = model.encoder(
+            input_ids=torch.as_tensor([input_ids], device=device)
+        )[0]
+        start_ids = torch.as_tensor(
+            [[model.generation_config.decoder_start_token_id]],
+            dtype=torch.int64,
+            device=device,
+        )
+    else:
+        start_ids = torch.as_tensor([input_ids], device=device)
+    past_key_values = out = None
+    token_logprobs = [None]  # The first token has no logprobs.
+    sent_interrupt = False
+    finish_reason = None
+    for i in range(max_new_tokens):
+        if i == 0:  # prefill
+            if model.config.is_encoder_decoder:
+                out = model.decoder(
+                    input_ids=start_ids,
+                    encoder_hidden_states=encoder_output,
+                    use_cache=True,
+                )
+                logits = model.lm_head(out[0])
+            else:
+                out = model(input_ids=start_ids, use_cache=True)
+                logits = out.logits
+            past_key_values = out.past_key_values
+            if logprobs is not None:
+                # Prefull logprobs for the prompt.
+                shift_input_ids = start_ids[..., 1:].contiguous()
+                shift_logits = logits[..., :-1, :].contiguous()
+                shift_logits = torch.log_softmax(shift_logits, dim=-1).tolist()
+                for label_id, logit in zip(
+                    shift_input_ids[0].tolist(), shift_logits[0]
+                ):
+                    token_logprobs.append(logit[label_id])
+        else:  # decoding
+            if model.config.is_encoder_decoder:
+                out = model.decoder(
+                    input_ids=torch.as_tensor(
+                        [[token] if not sent_interrupt else output_ids],
+                        device=device,
+                    ),
+                    encoder_hidden_states=encoder_output,
+                    use_cache=True,
+                    past_key_values=past_key_values if not sent_interrupt else None,
+                )
+                sent_interrupt = False
+                logits = model.lm_head(out[0])
+            else:
+                out = model(
+                    input_ids=torch.as_tensor(
+                        [[token] if not sent_interrupt else output_ids],
+                        device=device,
+                    ),
+                    use_cache=True,
+                    past_key_values=past_key_values if not sent_interrupt else None,
+                )
+                sent_interrupt = False
+                logits = out.logits
+            past_key_values = out.past_key_values
+        if logits_processor:
+            if repetition_penalty > 1.0:
+                tmp_output_ids = torch.as_tensor([output_ids], device=logits.device)
+            else:
+                tmp_output_ids = None
+            last_token_logits = logits_processor(tmp_output_ids, logits[:, -1, :])[0]
+        else:
+            last_token_logits = logits[0, -1, :]
+        if device == "mps":
+            # Switch to CPU by avoiding some bugs in mps backend.
+            last_token_logits = last_token_logits.float().to("cpu")
+        if temperature < 1e-5 or top_p < 1e-8:  # greedy
+            _, indices = torch.topk(last_token_logits, 2)
+            tokens = [int(index) for index in indices.tolist()]
+        else:
+            probs = torch.softmax(last_token_logits, dim=-1)
+            indices = torch.multinomial(probs, num_samples=2)
+            tokens = [int(token) for token in indices.tolist()]
+        token = tokens[0]
+        output_ids.append(token)
+        if logprobs is not None:
+            # Cannot use last_token_logits because logprobs is based on raw logits.
+            token_logprobs.append(
+                torch.log_softmax(logits[0, -1, :], dim=-1)[token].tolist()
+            )
+        if token in stop_token_ids:
+            stopped = True
+        else:
+            stopped = False
+        # Yield the output tokens
+        if i % stream_interval == 0 or i == max_new_tokens - 1 or stopped:
+            if echo:
+                tmp_output_ids = output_ids
+                rfind_start = len_prompt
+            else:
+                tmp_output_ids = output_ids[input_echo_len:]
+                rfind_start = 0
+            output = tokenizer.decode(
+                tmp_output_ids,
+                skip_special_tokens=skip_special_tokens,
+                spaces_between_special_tokens=False,
+                clean_up_tokenization_spaces=True,
+            )
+            ret_logprobs = None
+            if logprobs is not None:
+                ret_logprobs = {
+                    "text_offset": [],
+                    "tokens": [
+                        tokenizer.decode(token)
+                        for token in (
+                            output_ids if echo else output_ids[input_echo_len:]
+                        )
+                    ],
+                    "token_logprobs": token_logprobs
+                    if echo
+                    else token_logprobs[input_echo_len:],
+                    "top_logprobs": [{}]
+                    * len(token_logprobs if echo else token_logprobs[input_echo_len:]),
+                }
+                # Compute text_offset
+                curr_pos = 0
+                for text in ret_logprobs["tokens"]:
+                    ret_logprobs["text_offset"].append(curr_pos)
+                    curr_pos += len(text)
+            # TODO: For the issue of incomplete sentences interrupting output, apply a patch and others can also modify it to a more elegant way
+            if judge_sent_end and stopped and not is_sentence_complete(output):
+                if len(tokens) > 1:
+                    token = tokens[1]
+                    output_ids[-1] = token
+                else:
+                    output_ids.pop()
+                stopped = False
+                sent_interrupt = True
+            partially_stopped = False
+            if stop_str:
+                if isinstance(stop_str, str):
+                    pos = output.rfind(stop_str, rfind_start)
+                    if pos != -1:
+                        output = output[:pos]
+                        stopped = True
+                    else:
+                        partially_stopped = is_partial_stop(output, stop_str)
+                elif isinstance(stop_str, Iterable):
+                    for each_stop in stop_str:
+                        pos = output.rfind(each_stop, rfind_start)
+                        if pos != -1:
+                            output = output[:pos]
+                            stopped = True
+                            break
+                        else:
+                            partially_stopped = is_partial_stop(output, each_stop)
+                            if partially_stopped:
+                                break
+                else:
+                    raise ValueError("Invalid stop field type.")
+            # Prevent yielding partial stop sequence
+            if not partially_stopped:
+                yield {
+                    "text": output,
+                    "logprobs": ret_logprobs,
+                    "usage": {
+                        "prompt_tokens": input_echo_len,
+                        "completion_tokens": i,
+                        "total_tokens": input_echo_len + i,
+                    },
+                    "finish_reason": None,
+                }
+        if stopped:
+            break
+    # Finish stream event, which contains finish reason
+    else:
+        finish_reason = "length"
+    if stopped:
+        finish_reason = "stop"
+    yield {
+        "text": output,
+        "logprobs": ret_logprobs,
+        "usage": {
+            "prompt_tokens": input_echo_len,
+            "completion_tokens": i,
+            "total_tokens": input_echo_len + i,
+        },
+        "finish_reason": finish_reason,
+    }
+    # Clean
+    del past_key_values, out
+    gc.collect()
+    torch.cuda.empty_cache()
+    if device == "xpu":
+        torch.xpu.empty_cache()
+    if device == "npu":
+        torch.npu.empty_cache()
+class ChatIO(abc.ABC):
+    @abc.abstractmethod
+    def prompt_for_input(self, role: str) -> str:
+        """Prompt for input from a role."""
+    @abc.abstractmethod
+    def prompt_for_output(self, role: str):
+        """Prompt for output from a role."""
+    @abc.abstractmethod
+    def stream_output(self, output_stream):
+        """Stream output."""
+    @abc.abstractmethod
+    def print_output(self, text: str):
+        """Print output."""
+def convert_message_format(message):
+    formated_message = []
+    for i, turn in enumerate(message):
+        role = 'user' if i % 2 == 0 else 'assistant'
+        formated_message.append({'role': role, 'content': turn[1]})
+    data = {
+        'conversations': formated_message,
+        'idx': -1,
+        'tinder': 'badcase',
+        'model': '',
+        'tokens_in': 0,
+        'tokens_out': 0,
+    }
+    return data
+def chat_loop(
+    model_path: str,
+    device: str,
+    num_gpus: int,
+    max_gpu_memory: str,
+    dtype: Optional[torch.dtype],
+    load_8bit: bool,
+    cpu_offloading: bool,
+    conv_template: Optional[str],
+    conv_system_msg: Optional[str],
+    temperature: float,
+    repetition_penalty: float,
+    max_new_tokens: int,
+    chatio: ChatIO,
+    gptq_config: Optional[GptqConfig] = None,
+    awq_config: Optional[AWQConfig] = None,
+    exllama_config: Optional[ExllamaConfig] = None,
+    xft_config: Optional[XftConfig] = None,
+    revision: str = "main",
+    judge_sent_end: bool = True,
+    debug: bool = True,
+    history: bool = True,
+):
+    # Model
+    model, tokenizer = load_model(
+        model_path,
+        device=device,
+        num_gpus=num_gpus,
+        max_gpu_memory=max_gpu_memory,
+        dtype=dtype,
+        load_8bit=load_8bit,
+        cpu_offloading=cpu_offloading,
+        gptq_config=gptq_config,
+        awq_config=awq_config,
+        exllama_config=exllama_config,
+        xft_config=xft_config,
+        revision=revision,
+        debug=debug,
+    )
+    generate_stream_func = get_generate_stream_function(model, model_path)
+    model_type = str(type(model)).lower()
+    is_t5 = "t5" in model_type
+    is_codet5p = "codet5p" in model_type
+    is_xft = "xft" in model_type
+    # Hardcode T5's default repetition penalty to be 1.2
+    if is_t5 and repetition_penalty == 1.0:
+        repetition_penalty = 1.2
+    # Set context length
+    context_len = get_context_length(model.config)
+    # Chat
+    def new_chat():
+        if conv_template:
+            conv = get_conv_template(conv_template)
+        else:
+            conv = get_conversation_template(model_path)
+        if conv_system_msg is not None:
+            conv.set_system_message(conv_system_msg)
+        return conv
+    def reload_conv(conv):
+        """
+        Reprints the conversation from the start.
+        """
+        for message in conv.messages[conv.offset :]:
+            chatio.prompt_for_output(message[0])
+            chatio.print_output(message[1])
+    conv = None
+    while True:
+        if not history or not conv:
+            conv = new_chat()
+        try:
+            inp = chatio.prompt_for_input(conv.roles[0])
+        except EOFError:
+            inp = ""
+        if inp == "!!exit":# or not inp:
+            print("exit...")
+            break
+        elif inp == "!!reset":
+            print("resetting...")
+            conv = new_chat()
+            continue
+        elif inp == "!!remove":
+            print("removing last message...")
+            if len(conv.messages) > conv.offset:
+                # Assistant
+                if conv.messages[-1][0] == conv.roles[1]:
+                    conv.messages.pop()
+                # User
+                if conv.messages[-1][0] == conv.roles[0]:
+                    conv.messages.pop()
+                reload_conv(conv)
+            else:
+                print("No messages to remove.")
+            continue
+        elif inp == "!!regen":
+            print("regenerating last message...")
+            if len(conv.messages) > conv.offset:
+                # Assistant
+                if conv.messages[-1][0] == conv.roles[1]:
+                    conv.messages.pop()
+                # User
+                if conv.messages[-1][0] == conv.roles[0]:
+                    reload_conv(conv)
+                    # Set inp to previous message
+                    inp = conv.messages.pop()[1]
+                else:
+                    # Shouldn't happen in normal circumstances
+                    print("No user message to regenerate from.")
+                    continue
+            else:
+                print("No messages to regenerate.")
+                continue
+        elif inp.startswith("!!save"):
+            args = inp.split(" ", 1)
+            if len(args) != 2:
+                print("usage: !!save <filename>")
+                continue
+            else:
+                filename = args[1]
+            # Add .json if extension not present
+            if not "." in filename:
+                filename += ".json"
+            print("saving...", filename)
+            with open(filename, "w", encoding="utf-8") as outfile:
+                json.dump(conv.dict(), outfile, ensure_ascii=False)
+            continue
+        elif inp.startswith("!!badcase"):
+            args = inp.split(" ", 1)
+            if len(args) != 2:
+                print("usage: !!save <filename>")
+                continue
+            else:
+                filename = args[1]
+            # Add .json if extension not present
+            if not "." in filename:
+                filename += ".jsonl"
+            print("saving...", filename)
+            with open(filename, "a+", encoding="utf-8") as outfile:
+                data = convert_message_format(conv.messages)
+                json.dump(data, outfile, ensure_ascii=False)
+                outfile.write('\n')
+            continue
+        elif inp.startswith("!!load"):
+            args = inp.split(" ", 1)
+            if len(args) != 2:
+                print("usage: !!load <filename>")
+                continue
+            else:
+                filename = args[1]
+            # Check if file exists and add .json if needed
+            if not os.path.exists(filename):
+                if (not filename.endswith(".json")) and os.path.exists(
+                    filename + ".json"
+                ):
+                    filename += ".json"
+                else:
+                    print("file not found:", filename)
+                    continue
+            print("loading...", filename)
+            with open(filename, "r") as infile:
+                new_conv = json.load(infile)
+            conv = get_conv_template(new_conv["template_name"])
+            conv.set_system_message(new_conv["system_message"])
+            conv.messages = new_conv["messages"]
+            reload_conv(conv)
+            continue
+        conv.append_message(conv.roles[0], inp)
+        conv.append_message(conv.roles[1], None)
+        prompt = conv.get_prompt(tokenizer)
+        if is_codet5p:  # codet5p is a code completion model.
+            prompt = inp
+        gen_params = {
+            "model": model_path,
+            "prompt": prompt,
+            "temperature": temperature,
+            "repetition_penalty": repetition_penalty,
+            "max_new_tokens": max_new_tokens,
+            "stop": conv.stop_str,
+            "stop_token_ids": conv.stop_token_ids,
+            "none_stop": conv.none_stop,
+            "skip_special_tokens": conv.skip_special_tokens,
+            "echo": False,
+        }
+        try:
+            chatio.prompt_for_output(conv.roles[1])
+            output_stream = generate_stream_func(
+                model,
+                tokenizer,
+                gen_params,
+                device,
+                context_len=context_len,
+                judge_sent_end=judge_sent_end,
+            )
+            t = time.time()
+            outputs = chatio.stream_output(output_stream)
+            duration = time.time() - t
+            conv.update_last_message(outputs.strip())
+            if debug:
+                num_tokens = len(tokenizer.encode(outputs))
+                msg = {
+                    "conv_template": conv.name,
+                    "prompt": prompt,
+                    "outputs": outputs,
+                    "speed (token/s)": round(num_tokens / duration, 2),
+                }
+                print(f"\n{msg}\n")
+        except KeyboardInterrupt:
+            print("stopped generation.")
+            # If generation didn't finish
+            if conv.messages[-1][1] is None:
+                conv.messages.pop()
+                # Remove last user message, so there isn't a double up
+                if conv.messages[-1][0] == conv.roles[0]:
+                    conv.messages.pop()
+                reload_conv(conv)

launch_all_serve.py ADDED Viewed

	@@ -0,0 +1,284 @@

+"""
+Usage: python launch_all_serve_by_shell.py --model-path-address "THUDM/chatglm2-6b@localhost@2021" "huggyllama/llama-7b@localhost@2022"
+Workers are listed in format of `model-path`@`host`@`port`
+The key mechanism behind this scripts is:
+    1, execute shell cmd to launch the controller/worker/openai-api-server;
+    2, check the log of controller/worker/openai-api-server to ensure that the serve is launched properly.
+Note that a few of non-critical `fastchat.serve` cmd options are not supported currently.
+"""
+import sys
+import os
+sys.path.append(os.path.dirname(os.path.dirname(__file__)))
+import subprocess
+import re
+import argparse
+LOGDIR = "./logs/"
+if not os.path.exists(LOGDIR):
+    os.makedirs(LOGDIR)
+parser = argparse.ArgumentParser()
+# ------multi worker-----------------
+parser.add_argument(
+    "--model-path-address",
+    default="THUDM/chatglm2-6b@localhost@20002",
+    nargs="+",
+    type=str,
+    help="model path, host, and port, formatted as model-path@host@port",
+)
+# ---------------controller-------------------------
+parser.add_argument("--controller-host", type=str, default="localhost")
+parser.add_argument("--controller-port", type=int, default=21001)
+parser.add_argument(
+    "--dispatch-method",
+    type=str,
+    choices=["lottery", "shortest_queue"],
+    default="shortest_queue",
+)
+controller_args = ["controller-host", "controller-port", "dispatch-method"]
+# ----------------------worker------------------------------------------
+parser.add_argument("--worker-host", type=str, default="localhost")
+parser.add_argument("--worker-port", type=int, default=21002)
+# parser.add_argument("--worker-address", type=str, default="http://localhost:21002")
+# parser.add_argument(
+#     "--controller-address", type=str, default="http://localhost:21001"
+# )
+parser.add_argument(
+    "--model-path",
+    type=str,
+    default="lmsys/vicuna-7b-v1.5",
+    help="The path to the weights. This can be a local folder or a Hugging Face repo ID.",
+)
+parser.add_argument(
+    "--revision",
+    type=str,
+    default="main",
+    help="Hugging Face Hub model revision identifier",
+)
+parser.add_argument(
+    "--device",
+    type=str,
+    choices=["cpu", "cuda", "mps", "xpu", "npu"],
+    default="cuda",
+    help="The device type",
+)
+parser.add_argument(
+    "--gpus",
+    type=str,
+    default="0",
+    help="A single GPU like 1 or multiple GPUs like 0,2",
+)
+parser.add_argument("--num-gpus", type=int, default=1)
+parser.add_argument(
+    "--max-gpu-memory",
+    type=str,
+    help="The maximum memory per gpu. Use a string like '13Gib'",
+)
+parser.add_argument("--load-8bit", action="store_true", help="Use 8-bit quantization")
+parser.add_argument(
+    "--cpu-offloading",
+    action="store_true",
+    help="Only when using 8-bit quantization: Offload excess weights to the CPU that don't fit on the GPU",
+)
+parser.add_argument(
+    "--gptq-ckpt",
+    type=str,
+    default=None,
+    help="Load quantized model. The path to the local GPTQ checkpoint.",
+)
+parser.add_argument(
+    "--gptq-wbits",
+    type=int,
+    default=16,
+    choices=[2, 3, 4, 8, 16],
+    help="#bits to use for quantization",
+)
+parser.add_argument(
+    "--gptq-groupsize",
+    type=int,
+    default=-1,
+    help="Groupsize to use for quantization; default uses full row.",
+)
+parser.add_argument(
+    "--gptq-act-order",
+    action="store_true",
+    help="Whether to apply the activation order GPTQ heuristic",
+)
+parser.add_argument(
+    "--model-names",
+    type=lambda s: s.split(","),
+    help="Optional display comma separated names",
+)
+parser.add_argument(
+    "--limit-worker-concurrency",
+    type=int,
+    default=5,
+    help="Limit the model concurrency to prevent OOM.",
+)
+parser.add_argument("--stream-interval", type=int, default=2)
+parser.add_argument("--no-register", action="store_true")
+worker_args = [
+    "worker-host",
+    "worker-port",
+    "model-path",
+    "revision",
+    "device",
+    "gpus",
+    "num-gpus",
+    "max-gpu-memory",
+    "load-8bit",
+    "cpu-offloading",
+    "gptq-ckpt",
+    "gptq-wbits",
+    "gptq-groupsize",
+    "gptq-act-order",
+    "model-names",
+    "limit-worker-concurrency",
+    "stream-interval",
+    "no-register",
+    "controller-address",
+]
+# -----------------openai server---------------------------
+parser.add_argument("--server-host", type=str, default="localhost", help="host name")
+parser.add_argument("--server-port", type=int, default=8001, help="port number")
+parser.add_argument(
+    "--allow-credentials", action="store_true", help="allow credentials"
+)
+# parser.add_argument(
+#     "--allowed-origins", type=json.loads, default=["*"], help="allowed origins"
+# )
+# parser.add_argument(
+#     "--allowed-methods", type=json.loads, default=["*"], help="allowed methods"
+# )
+# parser.add_argument(
+#     "--allowed-headers", type=json.loads, default=["*"], help="allowed headers"
+# )
+parser.add_argument(
+    "--api-keys",
+    type=lambda s: s.split(","),
+    help="Optional list of comma separated API keys",
+)
+server_args = [
+    "server-host",
+    "server-port",
+    "allow-credentials",
+    "api-keys",
+    "controller-address",
+]
+args = parser.parse_args()
+args = argparse.Namespace(
+    **vars(args),
+    **{"controller-address": f"http://{args.controller_host}:{args.controller_port}"},
+)
+if args.gpus:
+    if len(args.gpus.split(",")) < args.num_gpus:
+        raise ValueError(
+            f"Larger --num-gpus ({args.num_gpus}) than --gpus {args.gpus}!"
+        )
+    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus
+# 0,controller, model_worker, openai_api_server
+# 1, cmd options
+# 2,LOGDIR
+# 3, log file name
+base_launch_sh = "nohup python3 -m fastchat.serve.{0} {1} >{2}/{3}.log 2>&1 &"
+# 0 LOGDIR
+#! 1 log file name
+# 2 controller, worker, openai_api_server
+base_check_sh = """while [ `grep -c "Uvicorn running on" {0}/{1}.log` -eq '0' ];do
+                        sleep 1s;
+                        echo "wait {2} running"
+                done
+                echo '{2} running' """
+def string_args(args, args_list):
+    args_str = ""
+    for key, value in args._get_kwargs():
+        key = key.replace("_", "-")
+        if key not in args_list:
+            continue
+        key = key.split("-")[-1] if re.search("port|host", key) else key
+        if not value:
+            pass
+        # 1==True ->  True
+        elif isinstance(value, bool) and value == True:
+            args_str += f" --{key} "
+        elif (
+            isinstance(value, list)
+            or isinstance(value, tuple)
+            or isinstance(value, set)
+        ):
+            value = " ".join(value)
+            args_str += f" --{key} {value} "
+        else:
+            args_str += f" --{key} {value} "
+    return args_str
+def launch_worker(item):
+    log_name = (
+        item.split("/")[-1]
+        .split("\\")[-1]
+        .replace("-", "_")
+        .replace("@", "_")
+        .replace(".", "_")
+    )
+    args.model_path, args.worker_host, args.worker_port = item.split("@")
+    print("*" * 80)
+    worker_str_args = string_args(args, worker_args)
+    print(worker_str_args)
+    worker_sh = base_launch_sh.format(
+        "model_worker", worker_str_args, LOGDIR, f"worker_{log_name}"
+    )
+    worker_check_sh = base_check_sh.format(LOGDIR, f"worker_{log_name}", "model_worker")
+    subprocess.run(worker_sh, shell=True, check=True)
+    subprocess.run(worker_check_sh, shell=True, check=True)
+def launch_all():
+    controller_str_args = string_args(args, controller_args)
+    controller_sh = base_launch_sh.format(
+        "controller", controller_str_args, LOGDIR, "controller"
+    )
+    controller_check_sh = base_check_sh.format(LOGDIR, "controller", "controller")
+    subprocess.run(controller_sh, shell=True, check=True)
+    subprocess.run(controller_check_sh, shell=True, check=True)
+    if isinstance(args.model_path_address, str):
+        launch_worker(args.model_path_address)
+    else:
+        for idx, item in enumerate(args.model_path_address):
+            print(f"loading {idx}th model:{item}")
+            launch_worker(item)
+    server_str_args = string_args(args, server_args)
+    server_sh = base_launch_sh.format(
+        "openai_api_server", server_str_args, LOGDIR, "openai_api_server"
+    )
+    server_check_sh = base_check_sh.format(
+        LOGDIR, "openai_api_server", "openai_api_server"
+    )
+    subprocess.run(server_sh, shell=True, check=True)
+    subprocess.run(server_check_sh, shell=True, check=True)
+if __name__ == "__main__":
+    launch_all()

model_worker.py ADDED Viewed

	@@ -0,0 +1,363 @@

+"""
+A model worker that executes the model.
+"""
+import argparse
+import base64
+import gc
+import json
+import os
+from typing import List, Optional
+import uuid
+import torch
+import torch.nn.functional as F
+from transformers import set_seed
+import uvicorn
+from fastchat.constants import ErrorCode, SERVER_ERROR_MSG
+from fastchat.model.model_adapter import (
+    load_model,
+    add_model_args,
+    get_generate_stream_function,
+)
+from fastchat.modules.awq import AWQConfig
+from fastchat.modules.exllama import ExllamaConfig
+from fastchat.modules.xfastertransformer import XftConfig
+from fastchat.modules.gptq import GptqConfig
+from fastchat.serve.base_model_worker import BaseModelWorker, app
+from fastchat.utils import (
+    build_logger,
+    get_context_length,
+    str_to_torch_dtype,
+)
+worker_id = str(uuid.uuid4())[:8]
+logger = build_logger("model_worker", f"model_worker_{worker_id}.log")
+class ModelWorker(BaseModelWorker):
+    def __init__(
+        self,
+        controller_addr: str,
+        worker_addr: str,
+        worker_id: str,
+        model_path: str,
+        model_names: List[str],
+        limit_worker_concurrency: int,
+        no_register: bool,
+        device: str,
+        num_gpus: int,
+        max_gpu_memory: str,
+        dtype: Optional[torch.dtype] = None,
+        load_8bit: bool = False,
+        cpu_offloading: bool = False,
+        gptq_config: Optional[GptqConfig] = None,
+        awq_config: Optional[AWQConfig] = None,
+        exllama_config: Optional[ExllamaConfig] = None,
+        xft_config: Optional[XftConfig] = None,
+        stream_interval: int = 2,
+        conv_template: Optional[str] = None,
+        embed_in_truncate: bool = False,
+        seed: Optional[int] = None,
+        debug: bool = False,
+        **kwargs,
+    ):
+        super().__init__(
+            controller_addr,
+            worker_addr,
+            worker_id,
+            model_path,
+            model_names,
+            limit_worker_concurrency,
+            conv_template=conv_template,
+        )
+        logger.info(f"Loading the model {self.model_names} on worker {worker_id} ...")
+        self.model, self.tokenizer = load_model(
+            model_path,
+            device=device,
+            num_gpus=num_gpus,
+            max_gpu_memory=max_gpu_memory,
+            dtype=dtype,
+            load_8bit=load_8bit,
+            cpu_offloading=cpu_offloading,
+            gptq_config=gptq_config,
+            awq_config=awq_config,
+            exllama_config=exllama_config,
+            xft_config=xft_config,
+            debug=debug,
+            model_name=model_names[0],
+        )
+        self.device = device
+        if self.tokenizer.pad_token == None:
+            self.tokenizer.pad_token = self.tokenizer.eos_token
+        self.context_len = get_context_length(self.model.config)
+        self.generate_stream_func = get_generate_stream_function(self.model, model_path)
+        self.stream_interval = stream_interval
+        self.embed_in_truncate = embed_in_truncate
+        self.seed = seed
+        if not no_register:
+            self.init_heart_beat()
+    def generate_stream_gate(self, params):
+        self.call_ct += 1
+        try:
+            if self.seed is not None:
+                set_seed(self.seed)
+            for output in self.generate_stream_func(
+                self.model,
+                self.tokenizer,
+                params,
+                self.device,
+                self.context_len,
+                self.stream_interval,
+            ):
+                ret = {
+                    "text": output["text"],
+                    "error_code": 0,
+                }
+                if "usage" in output:
+                    ret["usage"] = output["usage"]
+                if "finish_reason" in output:
+                    ret["finish_reason"] = output["finish_reason"]
+                if "logprobs" in output:
+                    ret["logprobs"] = output["logprobs"]
+                yield json.dumps(ret).encode() + b"\0"
+        except torch.cuda.OutOfMemoryError as e:
+            ret = {
+                "text": f"{SERVER_ERROR_MSG}\n\n({e})",
+                "error_code": ErrorCode.CUDA_OUT_OF_MEMORY,
+            }
+            yield json.dumps(ret).encode() + b"\0"
+        except (ValueError, RuntimeError) as e:
+            ret = {
+                "text": f"{SERVER_ERROR_MSG}\n\n({e})",
+                "error_code": ErrorCode.INTERNAL_ERROR,
+            }
+            yield json.dumps(ret).encode() + b"\0"
+    def generate_gate(self, params):
+        for x in self.generate_stream_gate(params):
+            pass
+        return json.loads(x[:-1].decode())
+    def __process_embed_chunk(self, input_ids, attention_mask, **model_type_dict):
+        if model_type_dict.get("is_bert"):
+            model_output = self.model(input_ids)
+            if model_type_dict.get("is_robert"):
+                data = model_output.last_hidden_state
+            else:
+                data = model_output[0]
+        elif model_type_dict.get("is_t5"):
+            model_output = self.model(input_ids, decoder_input_ids=input_ids)
+            data = model_output.encoder_last_hidden_state
+        else:
+            model_output = self.model(input_ids, output_hidden_states=True)
+            if model_type_dict.get("is_chatglm"):
+                data = model_output.hidden_states[-1].transpose(0, 1)
+            else:
+                data = model_output.hidden_states[-1]
+        mask = attention_mask.unsqueeze(-1).expand(data.size()).float()
+        masked_embeddings = data * mask
+        sum_embeddings = torch.sum(masked_embeddings, dim=1)
+        token_num = torch.sum(attention_mask).item()
+        return sum_embeddings, token_num
+    def __encode_base64(self, embeddings: torch.Tensor) -> List[str]:
+        embeddings = embeddings.cpu()
+        return [
+            base64.b64encode(e.numpy().tobytes()).decode("utf-8") for e in embeddings
+        ]
+    @torch.inference_mode()
+    def get_embeddings(self, params):
+        self.call_ct += 1
+        try:
+            tokenizer = self.tokenizer
+            ret = {"embedding": [], "token_num": 0}
+            model_type_dict = {
+                "is_llama": "llama" in str(type(self.model)),
+                "is_t5": "t5" in str(type(self.model)),
+                "is_chatglm": "chatglm" in str(type(self.model)),
+                "is_bert": "bert" in str(type(self.model)),
+                "is_robert": "robert" in str(type(self.model)),
+            }
+            if self.embed_in_truncate:
+                encoding = tokenizer.batch_encode_plus(
+                    params["input"],
+                    padding=True,
+                    truncation="longest_first",
+                    return_tensors="pt",
+                    max_length=self.context_len,
+                )
+            else:
+                encoding = tokenizer.batch_encode_plus(
+                    params["input"], padding=True, return_tensors="pt"
+                )
+            input_ids = encoding["input_ids"].to(self.device)
+            attention_mask = input_ids != tokenizer.pad_token_id
+            base64_encode = params.get("encoding_format", None)
+            if self.embed_in_truncate:
+                chunk_embeddings, token_num = self.__process_embed_chunk(
+                    input_ids, attention_mask, **model_type_dict
+                )
+                embedding = chunk_embeddings / token_num
+                normalized_embeddings = F.normalize(embedding, p=2, dim=1)
+                ret["token_num"] = token_num
+            else:
+                all_embeddings = []
+                all_token_num = 0
+                for i in range(0, input_ids.size(1), self.context_len):
+                    chunk_input_ids = input_ids[:, i : i + self.context_len]
+                    chunk_attention_mask = attention_mask[:, i : i + self.context_len]
+                    chunk_embeddings, token_num = self.__process_embed_chunk(
+                        chunk_input_ids, chunk_attention_mask, **model_type_dict
+                    )
+                    all_embeddings.append(chunk_embeddings)
+                    all_token_num += token_num
+                all_embeddings_tensor = torch.stack(all_embeddings)
+                embedding = torch.sum(all_embeddings_tensor, dim=0) / all_token_num
+                normalized_embeddings = F.normalize(embedding, p=2, dim=1)
+                ret["token_num"] = all_token_num
+            if base64_encode == "base64":
+                out_embeddings = self.__encode_base64(normalized_embeddings)
+            else:
+                out_embeddings = normalized_embeddings.tolist()
+            ret["embedding"] = out_embeddings
+            gc.collect()
+            torch.cuda.empty_cache()
+            if self.device == "xpu":
+                torch.xpu.empty_cache()
+            if self.device == "npu":
+                torch.npu.empty_cache()
+        except torch.cuda.OutOfMemoryError as e:
+            ret = {
+                "text": f"{SERVER_ERROR_MSG}\n\n({e})",
+                "error_code": ErrorCode.CUDA_OUT_OF_MEMORY,
+            }
+        except (ValueError, RuntimeError) as e:
+            ret = {
+                "text": f"{SERVER_ERROR_MSG}\n\n({e})",
+                "error_code": ErrorCode.INTERNAL_ERROR,
+            }
+        return ret
+def create_model_worker():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--host", type=str, default="localhost")
+    parser.add_argument("--port", type=int, default=21002)
+    parser.add_argument("--worker-address", type=str, default="http://localhost:21002")
+    parser.add_argument(
+        "--controller-address", type=str, default="http://localhost:21001"
+    )
+    add_model_args(parser)
+    parser.add_argument(
+        "--model-names",
+        type=lambda s: s.split(","),
+        help="Optional display comma separated names",
+    )
+    parser.add_argument(
+        "--conv-template", type=str, default=None, help="Conversation prompt template."
+    )
+    parser.add_argument("--embed-in-truncate", action="store_true")
+    parser.add_argument(
+        "--limit-worker-concurrency",
+        type=int,
+        default=5,
+        help="Limit the model concurrency to prevent OOM.",
+    )
+    parser.add_argument("--stream-interval", type=int, default=2)
+    parser.add_argument("--no-register", action="store_true")
+    parser.add_argument(
+        "--seed",
+        type=int,
+        default=None,
+        help="Overwrite the random seed for each generation.",
+    )
+    parser.add_argument(
+        "--debug", type=bool, default=False, help="Print debugging messages"
+    )
+    args = parser.parse_args()
+    logger.info(f"args: {args}")
+    if args.gpus:
+        if len(args.gpus.split(",")) < args.num_gpus:
+            raise ValueError(
+                f"Larger --num-gpus ({args.num_gpus}) than --gpus {args.gpus}!"
+            )
+        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus
+    gptq_config = GptqConfig(
+        ckpt=args.gptq_ckpt or args.model_path,
+        wbits=args.gptq_wbits,
+        groupsize=args.gptq_groupsize,
+        act_order=args.gptq_act_order,
+    )
+    awq_config = AWQConfig(
+        ckpt=args.awq_ckpt or args.model_path,
+        wbits=args.awq_wbits,
+        groupsize=args.awq_groupsize,
+    )
+    if args.enable_exllama:
+        exllama_config = ExllamaConfig(
+            max_seq_len=args.exllama_max_seq_len,
+            gpu_split=args.exllama_gpu_split,
+        )
+    else:
+        exllama_config = None
+    if args.enable_xft:
+        xft_config = XftConfig(
+            max_seq_len=args.xft_max_seq_len,
+            data_type=args.xft_dtype,
+        )
+        if args.device != "cpu":
+            print("xFasterTransformer now is only support CPUs. Reset device to CPU")
+            args.device = "cpu"
+    else:
+        xft_config = None
+    worker = ModelWorker(
+        args.controller_address,
+        args.worker_address,
+        worker_id,
+        args.model_path,
+        args.model_names,
+        args.limit_worker_concurrency,
+        no_register=args.no_register,
+        device=args.device,
+        num_gpus=args.num_gpus,
+        max_gpu_memory=args.max_gpu_memory,
+        dtype=str_to_torch_dtype(args.dtype),
+        load_8bit=args.load_8bit,
+        cpu_offloading=args.cpu_offloading,
+        gptq_config=gptq_config,
+        awq_config=awq_config,
+        exllama_config=exllama_config,
+        xft_config=xft_config,
+        stream_interval=args.stream_interval,
+        conv_template=args.conv_template,
+        embed_in_truncate=args.embed_in_truncate,
+        seed=args.seed,
+        debug=args.debug,
+    )
+    return args, worker
+if __name__ == "__main__":
+    args, worker = create_model_worker()
+    uvicorn.run(app, host=args.host, port=args.port, log_level="info")

monitor/basic_stats.py ADDED Viewed

	@@ -0,0 +1,210 @@

+import argparse
+import code
+import datetime
+import json
+import os
+from pytz import timezone
+import time
+import pandas as pd  # pandas>=2.0.3
+import plotly.express as px
+import plotly.graph_objects as go
+from tqdm import tqdm
+NUM_SERVERS = 14
+def get_log_files(max_num_files=None):
+    dates = []
+    for month in range(4, 12):
+        for day in range(1, 33):
+            dates.append(f"2023-{month:02d}-{day:02d}")
+    filenames = []
+    for d in dates:
+        for i in range(NUM_SERVERS):
+            name = os.path.expanduser(f"~/fastchat_logs/server{i}/{d}-conv.json")
+            if os.path.exists(name):
+                filenames.append(name)
+    max_num_files = max_num_files or len(filenames)
+    filenames = filenames[-max_num_files:]
+    return filenames
+def load_log_files(log_files):
+    data = []
+    for filename in tqdm(log_files, desc="read files"):
+        for retry in range(5):
+            try:
+                lines = open(filename).readlines()
+                break
+            except FileNotFoundError:
+                time.sleep(2)
+        for l in lines:
+            row = json.loads(l)
+            data.append(
+                dict(
+                    type=row["type"],
+                    tstamp=row["tstamp"],
+                    model=row.get("model", ""),
+                    models=row.get("models", ["", ""]),
+                )
+            )
+    return data
+def get_anony_vote_df(df):
+    anony_vote_df = df[
+        df["type"].isin(["leftvote", "rightvote", "tievote", "bothbad_vote"])
+    ]
+    anony_vote_df = anony_vote_df[anony_vote_df["models"].apply(lambda x: x[0] == "")]
+    return anony_vote_df
+def merge_counts(series, on, names):
+    ret = pd.merge(series[0], series[1], on=on)
+    for i in range(2, len(series)):
+        ret = pd.merge(ret, series[i], on=on)
+    ret = ret.reset_index()
+    old_names = list(ret.columns)[-len(series) :]
+    rename = {old_name: new_name for old_name, new_name in zip(old_names, names)}
+    ret = ret.rename(columns=rename)
+    return ret
+def report_basic_stats(log_files):
+    df_all = load_log_files(log_files)
+    df_all = pd.DataFrame(df_all)
+    now_t = df_all["tstamp"].max()
+    df_1_hour = df_all[df_all["tstamp"] > (now_t - 3600)]
+    df_1_day = df_all[df_all["tstamp"] > (now_t - 3600 * 24)]
+    anony_vote_df_all = get_anony_vote_df(df_all)
+    # Chat trends
+    chat_dates = [
+        datetime.datetime.fromtimestamp(x, tz=timezone("US/Pacific")).strftime(
+            "%Y-%m-%d"
+        )
+        for x in df_all[df_all["type"] == "chat"]["tstamp"]
+    ]
+    chat_dates_counts = pd.value_counts(chat_dates)
+    vote_dates = [
+        datetime.datetime.fromtimestamp(x, tz=timezone("US/Pacific")).strftime(
+            "%Y-%m-%d"
+        )
+        for x in anony_vote_df_all["tstamp"]
+    ]
+    vote_dates_counts = pd.value_counts(vote_dates)
+    chat_dates_bar = go.Figure(
+        data=[
+            go.Bar(
+                name="Anony. Vote",
+                x=vote_dates_counts.index,
+                y=vote_dates_counts,
+                text=[f"{val:.0f}" for val in vote_dates_counts],
+                textposition="auto",
+            ),
+            go.Bar(
+                name="Chat",
+                x=chat_dates_counts.index,
+                y=chat_dates_counts,
+                text=[f"{val:.0f}" for val in chat_dates_counts],
+                textposition="auto",
+            ),
+        ]
+    )
+    chat_dates_bar.update_layout(
+        barmode="stack",
+        xaxis_title="Dates",
+        yaxis_title="Count",
+        height=300,
+        width=1200,
+    )
+    # Model call counts
+    model_hist_all = df_all[df_all["type"] == "chat"]["model"].value_counts()
+    model_hist_1_day = df_1_day[df_1_day["type"] == "chat"]["model"].value_counts()
+    model_hist_1_hour = df_1_hour[df_1_hour["type"] == "chat"]["model"].value_counts()
+    model_hist = merge_counts(
+        [model_hist_all, model_hist_1_day, model_hist_1_hour],
+        on="model",
+        names=["All", "Last Day", "Last Hour"],
+    )
+    model_hist_md = model_hist.to_markdown(index=False, tablefmt="github")
+    # Action counts
+    action_hist_all = df_all["type"].value_counts()
+    action_hist_1_day = df_1_day["type"].value_counts()
+    action_hist_1_hour = df_1_hour["type"].value_counts()
+    action_hist = merge_counts(
+        [action_hist_all, action_hist_1_day, action_hist_1_hour],
+        on="type",
+        names=["All", "Last Day", "Last Hour"],
+    )
+    action_hist_md = action_hist.to_markdown(index=False, tablefmt="github")
+    # Anony vote counts
+    anony_vote_hist_all = anony_vote_df_all["type"].value_counts()
+    anony_vote_df_1_day = get_anony_vote_df(df_1_day)
+    anony_vote_hist_1_day = anony_vote_df_1_day["type"].value_counts()
+    # anony_vote_df_1_hour = get_anony_vote_df(df_1_hour)
+    # anony_vote_hist_1_hour = anony_vote_df_1_hour["type"].value_counts()
+    anony_vote_hist = merge_counts(
+        [anony_vote_hist_all, anony_vote_hist_1_day],
+        on="type",
+        names=["All", "Last Day"],
+    )
+    anony_vote_hist_md = anony_vote_hist.to_markdown(index=False, tablefmt="github")
+    # Last 24 hours
+    chat_1_day = df_1_day[df_1_day["type"] == "chat"]
+    num_chats_last_24_hours = []
+    base = df_1_day["tstamp"].min()
+    for i in range(24, 0, -1):
+        left = base + (i - 1) * 3600
+        right = base + i * 3600
+        num = ((chat_1_day["tstamp"] >= left) & (chat_1_day["tstamp"] < right)).sum()
+        num_chats_last_24_hours.append(num)
+    times = [
+        datetime.datetime.fromtimestamp(
+            base + i * 3600, tz=timezone("US/Pacific")
+        ).strftime("%Y-%m-%d %H:%M:%S %Z")
+        for i in range(24, 0, -1)
+    ]
+    last_24_hours_df = pd.DataFrame({"time": times, "value": num_chats_last_24_hours})
+    last_24_hours_md = last_24_hours_df.to_markdown(index=False, tablefmt="github")
+    # Last update datetime
+    last_updated_tstamp = now_t
+    last_updated_datetime = datetime.datetime.fromtimestamp(
+        last_updated_tstamp, tz=timezone("US/Pacific")
+    ).strftime("%Y-%m-%d %H:%M:%S %Z")
+    # code.interact(local=locals())
+    return {
+        "chat_dates_bar": chat_dates_bar,
+        "model_hist_md": model_hist_md,
+        "action_hist_md": action_hist_md,
+        "anony_vote_hist_md": anony_vote_hist_md,
+        "num_chats_last_24_hours": last_24_hours_md,
+        "last_updated_datetime": last_updated_datetime,
+    }
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--max-num-files", type=int)
+    args = parser.parse_args()
+    log_files = get_log_files(args.max_num_files)
+    basic_stats = report_basic_stats(log_files)
+    print(basic_stats["action_hist_md"] + "\n")
+    print(basic_stats["model_hist_md"] + "\n")
+    print(basic_stats["anony_vote_hist_md"] + "\n")
+    print(basic_stats["num_chats_last_24_hours"] + "\n")

monitor/clean_battle_data.py ADDED Viewed

	@@ -0,0 +1,269 @@

+"""
+Clean chatbot arena battle log.
+Usage:
+python3 clean_battle_data.py --mode conv_release
+"""
+import argparse
+import datetime
+import json
+import os
+from pytz import timezone
+import time
+from tqdm import tqdm
+from fastchat.serve.monitor.basic_stats import get_log_files, NUM_SERVERS
+from fastchat.utils import detect_language
+VOTES = ["tievote", "leftvote", "rightvote", "bothbad_vote"]
+IDENTITY_WORDS = [
+    "vicuna",
+    "lmsys",
+    "koala",
+    "uc berkeley",
+    "open assistant",
+    "laion",
+    "chatglm",
+    "chatgpt",
+    "openai",
+    "anthropic",
+    "claude",
+    "bard",
+    "palm",
+    "lamda",
+    "google",
+    "llama",
+    "NETWORK ERROR DUE TO HIGH TRAFFIC. PLEASE REGENERATE OR REFRESH THIS PAGE.",
+    "$MODERATION$ YOUR INPUT VIOLATES OUR CONTENT MODERATION GUIDELINES.",
+]
+for i in range(len(IDENTITY_WORDS)):
+    IDENTITY_WORDS[i] = IDENTITY_WORDS[i].lower()
+def get_log_files(max_num_files=None):
+    dates = []
+    for month in range(4, 12):
+        for day in range(1, 33):
+            dates.append(f"2023-{month:02d}-{day:02d}")
+    filenames = []
+    for d in dates:
+        for i in range(NUM_SERVERS):
+            name = os.path.expanduser(f"~/fastchat_logs/server{i}/{d}-conv.json")
+            if os.path.exists(name):
+                filenames.append(name)
+    max_num_files = max_num_files or len(filenames)
+    filenames = filenames[-max_num_files:]
+    return filenames
+def remove_html(raw):
+    if raw.startswith("<h3>"):
+        return raw[raw.find(": ") + 2 : -len("</h3>\n")]
+    return raw
+def to_openai_format(messages):
+    roles = ["user", "assistant"]
+    ret = []
+    for i, x in enumerate(messages):
+        ret.append({"role": roles[i % 2], "content": x[1]})
+    return ret
+def replace_model_name(old_name):
+    return (
+        old_name.replace("bard", "palm-2")
+        .replace("claude-v1", "claude-1")
+        .replace("claude-instant-v1", "claude-instant-1")
+        .replace("oasst-sft-1-pythia-12b", "oasst-pythia-12b")
+    )
+def clean_battle_data(log_files, exclude_model_names):
+    data = []
+    for filename in tqdm(log_files, desc="read files"):
+        for retry in range(5):
+            try:
+                lines = open(filename).readlines()
+                break
+            except FileNotFoundError:
+                time.sleep(2)
+        for l in lines:
+            row = json.loads(l)
+            if row["type"] in VOTES:
+                data.append(row)
+    convert_type = {
+        "leftvote": "model_a",
+        "rightvote": "model_b",
+        "tievote": "tie",
+        "bothbad_vote": "tie (bothbad)",
+    }
+    all_models = set()
+    all_ips = dict()
+    ct_anony = 0
+    ct_invalid = 0
+    ct_leaked_identity = 0
+    battles = []
+    for row in data:
+        if row["models"][0] is None or row["models"][1] is None:
+            continue
+        # Resolve model names
+        models_public = [remove_html(row["models"][0]), remove_html(row["models"][1])]
+        if "model_name" in row["states"][0]:
+            models_hidden = [
+                row["states"][0]["model_name"],
+                row["states"][1]["model_name"],
+            ]
+            if models_hidden[0] is None:
+                models_hidden = models_public
+        else:
+            models_hidden = models_public
+        if (models_public[0] == "" and models_public[1] != "") or (
+            models_public[1] == "" and models_public[0] != ""
+        ):
+            ct_invalid += 1
+            continue
+        if models_public[0] == "" or models_public[0] == "Model A":
+            anony = True
+            models = models_hidden
+            ct_anony += 1
+        else:
+            anony = False
+            models = models_public
+            if not models_public == models_hidden:
+                ct_invalid += 1
+                continue
+        # Detect langauge
+        state = row["states"][0]
+        if state["offset"] >= len(state["messages"]):
+            ct_invalid += 1
+            continue
+        lang_code = detect_language(state["messages"][state["offset"]][1])
+        # Drop conversations if the model names are leaked
+        leaked_identity = False
+        messages = ""
+        for i in range(2):
+            state = row["states"][i]
+            for role, msg in state["messages"][state["offset"] :]:
+                if msg:
+                    messages += msg.lower()
+        for word in IDENTITY_WORDS:
+            if word in messages:
+                leaked_identity = True
+                break
+        if leaked_identity:
+            ct_leaked_identity += 1
+            continue
+        # Replace bard with palm
+        models = [replace_model_name(m) for m in models]
+        # Exclude certain models
+        if any(x in exclude_model_names for x in models):
+            ct_invalid += 1
+            continue
+        question_id = row["states"][0]["conv_id"]
+        conversation_a = to_openai_format(
+            row["states"][0]["messages"][row["states"][0]["offset"] :]
+        )
+        conversation_b = to_openai_format(
+            row["states"][1]["messages"][row["states"][1]["offset"] :]
+        )
+        ip = row["ip"]
+        if ip not in all_ips:
+            all_ips[ip] = len(all_ips)
+        user_id = all_ips[ip]
+        # Save the results
+        battles.append(
+            dict(
+                question_id=question_id,
+                model_a=models[0],
+                model_b=models[1],
+                winner=convert_type[row["type"]],
+                judge=f"arena_user_{user_id}",
+                conversation_a=conversation_a,
+                conversation_b=conversation_b,
+                turn=len(conversation_a) // 2,
+                anony=anony,
+                language=lang_code,
+                tstamp=row["tstamp"],
+            )
+        )
+        all_models.update(models_hidden)
+    battles.sort(key=lambda x: x["tstamp"])
+    last_updated_tstamp = battles[-1]["tstamp"]
+    last_updated_datetime = datetime.datetime.fromtimestamp(
+        last_updated_tstamp, tz=timezone("US/Pacific")
+    ).strftime("%Y-%m-%d %H:%M:%S %Z")
+    print(
+        f"#votes: {len(data)}, #invalid votes: {ct_invalid}, "
+        f"#leaked_identity: {ct_leaked_identity}"
+    )
+    print(f"#battles: {len(battles)}, #anony: {ct_anony}")
+    print(f"#models: {len(all_models)}, {all_models}")
+    print(f"last-updated: {last_updated_datetime}")
+    return battles
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--max-num-files", type=int)
+    parser.add_argument(
+        "--mode", type=str, choices=["simple", "conv_release"], default="simple"
+    )
+    parser.add_argument("--exclude-model-names", type=str, nargs="+")
+    args = parser.parse_args()
+    log_files = get_log_files(args.max_num_files)
+    battles = clean_battle_data(log_files, args.exclude_model_names or [])
+    last_updated_tstamp = battles[-1]["tstamp"]
+    cutoff_date = datetime.datetime.fromtimestamp(
+        last_updated_tstamp, tz=timezone("US/Pacific")
+    ).strftime("%Y%m%d")
+    if args.mode == "simple":
+        for x in battles:
+            for key in [
+                "conversation_a",
+                "conversation_b",
+                "question_id",
+            ]:
+                del x[key]
+        print("Samples:")
+        for i in range(4):
+            print(battles[i])
+        output = f"clean_battle_{cutoff_date}.json"
+    elif args.mode == "conv_release":
+        new_battles = []
+        for x in battles:
+            if not x["anony"]:
+                continue
+            for key in []:
+                del x[key]
+            new_battles.append(x)
+        battles = new_battles
+        output = f"clean_battle_conv_{cutoff_date}.json"
+    with open(output, "w") as fout:
+        json.dump(battles, fout, indent=2, ensure_ascii=False)
+    print(f"Write cleaned data to {output}")

monitor/clean_chat_data.py ADDED Viewed

	@@ -0,0 +1,171 @@

+"""
+Clean chatbot arena chat log.
+Usage:
+python3 clean_chat_data.py --mode conv_release
+"""
+import argparse
+import datetime
+import json
+import os
+from pytz import timezone
+import time
+from tqdm import tqdm
+from fastchat.serve.monitor.basic_stats import NUM_SERVERS
+from fastchat.serve.monitor.clean_battle_data import (
+    to_openai_format,
+    replace_model_name,
+)
+from fastchat.utils import detect_language
+NETWORK_ERROR_MSG = (
+    "NETWORK ERROR DUE TO HIGH TRAFFIC. PLEASE REGENERATE OR REFRESH THIS PAGE.".lower()
+)
+def get_log_files(max_num_files=None):
+    dates = []
+    for month in range(4, 12):
+        for day in range(1, 33):
+            dates.append(f"2023-{month:02d}-{day:02d}")
+    filenames = []
+    for d in dates:
+        for i in range(NUM_SERVERS):
+            name = os.path.expanduser(f"~/fastchat_logs/server{i}/{d}-conv.json")
+            if os.path.exists(name):
+                filenames.append(name)
+    max_num_files = max_num_files or len(filenames)
+    # filenames = list(reversed(filenames))
+    filenames = filenames[-max_num_files:]
+    return filenames
+def clean_chat_data(log_files, action_type):
+    raw_data = []
+    for filename in tqdm(log_files, desc="read files"):
+        for retry in range(5):
+            try:
+                lines = open(filename).readlines()
+                break
+            except FileNotFoundError:
+                time.sleep(2)
+        for l in lines:
+            row = json.loads(l)
+            if row["type"] == action_type:
+                raw_data.append(row)
+    all_models = set()
+    all_ips = dict()
+    chats = []
+    ct_invalid_conv_id = 0
+    ct_invalid = 0
+    ct_network_error = 0
+    for row in raw_data:
+        try:
+            if action_type in ["chat", "upvote", "downvote"]:
+                state = row["state"]
+                model = row["model"]
+            elif action_type == "leftvote":
+                state = row["states"][0]
+                model = row["states"][0]["model_name"]
+            elif action_type == "rightvote":
+                state = row["states"][1]
+                model = row["states"][1]["model_name"]
+            conversation_id = state["conv_id"]
+        except KeyError:
+            ct_invalid_conv_id += 1
+            continue
+        if conversation_id is None:
+            ct_invalid_conv_id += 1
+            continue
+        conversation = to_openai_format(state["messages"][state["offset"] :])
+        if not isinstance(model, str):
+            ct_invalid += 1
+            continue
+        model = replace_model_name(model)
+        try:
+            lang_code = detect_language(state["messages"][state["offset"]][1])
+        except IndexError:
+            ct_invalid += 1
+            continue
+        if not all(isinstance(x["content"], str) for x in conversation):
+            ct_invalid += 1
+            continue
+        messages = "".join([x["content"] for x in conversation]).lower()
+        if NETWORK_ERROR_MSG in messages:
+            ct_network_error += 1
+            continue
+        ip = row["ip"]
+        if ip not in all_ips:
+            all_ips[ip] = len(all_ips)
+        user_id = all_ips[ip]
+        chats.append(
+            dict(
+                conversation_id=conversation_id,
+                model=model,
+                conversation=conversation,
+                turn=len(conversation) // 2,
+                language=lang_code,
+                user_id=user_id,
+                tstamp=row["tstamp"],
+            )
+        )
+        all_models.update([model])
+    chats.sort(key=lambda x: x["tstamp"])
+    last_updated_tstamp = chats[-1]["tstamp"]
+    last_updated_datetime = datetime.datetime.fromtimestamp(
+        last_updated_tstamp, tz=timezone("US/Pacific")
+    ).strftime("%Y-%m-%d %H:%M:%S %Z")
+    # Deduplication
+    dedup_chats = []
+    visited_conv_ids = set()
+    for i in reversed(range(len(chats))):
+        if chats[i]["conversation_id"] in visited_conv_ids:
+            continue
+        visited_conv_ids.add(chats[i]["conversation_id"])
+        dedup_chats.append(chats[i])
+    print(
+        f"#raw: {len(raw_data)}, #chat: {len(chats)}, #dedup_chat: {len(dedup_chats)}"
+    )
+    print(
+        f"#invalid_conv_id: {ct_invalid_conv_id}, #network_error: {ct_network_error}, #invalid: {ct_invalid}"
+    )
+    print(f"#models: {len(all_models)}, {all_models}")
+    print(f"last-updated: {last_updated_datetime}")
+    return list(reversed(dedup_chats))
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--action-type", type=str, default="chat")
+    parser.add_argument("--max-num-files", type=int)
+    args = parser.parse_args()
+    log_files = get_log_files(args.max_num_files)
+    chats = clean_chat_data(log_files, args.action_type)
+    last_updated_tstamp = chats[-1]["tstamp"]
+    cutoff_date = datetime.datetime.fromtimestamp(
+        last_updated_tstamp, tz=timezone("US/Pacific")
+    ).strftime("%Y%m%d")
+    output = f"clean_{args.action_type}_conv_{cutoff_date}.json"
+    with open(output, "w") as fout:
+        json.dump(chats, fout, indent=2, ensure_ascii=False)
+    print(f"Write cleaned data to {output}")

monitor/dataset_release_scripts/arena_33k/count_unique_users.py ADDED Viewed

	@@ -0,0 +1,25 @@

+"""Count the unique users in a battle log file."""
+import argparse
+import json
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input", type=str)
+    args = parser.parse_args()
+    lines = json.load(open(args.input))
+    ct_anony_votes = 0
+    all_users = set()
+    all_models = set()
+    for l in lines:
+        if not l["anony"]:
+            continue
+        all_users.add(l["judge"])
+        all_models.add(l["model_a"])
+        all_models.add(l["model_b"])
+        ct_anony_votes += 1
+    print(f"#anony_vote: {ct_anony_votes}, #user: {len(all_users)}")
+    print(f"#model: {len(all_models)}")

monitor/dataset_release_scripts/arena_33k/filter_bad_conv.py ADDED Viewed

	@@ -0,0 +1,155 @@

+"""
+Filter conversations for release.
+Usage: python3 filter_bad_conv.py --in clean_battle_conv_20230630_tagged_v1_pii.json
+"""
+import argparse
+from collections import defaultdict
+from enum import Enum, auto
+import json
+import os
+import random
+from tqdm import tqdm
+BLOCKED_WORDS_FILENAME = "blocked_words.json"
+blocked_words = []
+frequency = defaultdict(lambda: 0)
+class TypeCode(Enum):
+    CORRECT = auto()
+    ANONYMIZED = auto()
+    REDACTED = auto()
+    BAD_FORMAT = auto()
+    BLOCKED_WORD = auto()
+    BLOCKED_MODEL = auto()
+    TOO_SHORT = auto()
+    TOO_FREQUENT = auto()
+def detect_type(conv):
+    for key in ["conversation_a", "conversation_b"]:
+        messages = [row["content"] for row in conv[key]]
+        for msg in messages:
+            if not isinstance(msg, str):
+                return TypeCode.BAD_FORMAT
+        user_prompts = [
+            row["content"].lower().strip() for row in conv[key] if row["role"] == "user"
+        ]
+        if len(messages) <= 2 and all(len(x) < 16 for x in user_prompts):
+            return TypeCode.TOO_SHORT
+        if all(x in frequent_prompts for x in user_prompts):
+            return TypeCode.TOO_FREQUENT
+        for msg in messages:
+            msg = msg.lower()
+            if "<anonymized>" in msg:
+                return TypeCode.ANONYMIZED
+            if "<redacted>" in msg:
+                return TypeCode.REDACTED
+            for w in blocked_words:
+                if w in msg:
+                    return TypeCode.BLOCKED_WORD
+    for key in ["model_a", "model_b"]:
+        if conv[key] in ["vicuna-33b", "mpt-30b-chat"]:
+            return TypeCode.BLOCKED_MODEL
+    return TypeCode.CORRECT
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--in-file", type=str, required=True)
+    parser.add_argument("--sample", type=int)
+    args = parser.parse_args()
+    # Read conversations
+    convs = json.load(open(args.in_file))
+    print(f"#conv: {len(convs)}")
+    # Read blocked words
+    if os.path.exists(BLOCKED_WORDS_FILENAME):
+        blocked_words = json.load(open(BLOCKED_WORDS_FILENAME))
+    # Count frequency
+    for conv in convs:
+        for key in ["conversation_a", "conversation_b"]:
+            messages = [row["content"] for row in conv[key] if row["role"] == "user"]
+            for msg in messages:
+                if not isinstance(msg, str):
+                    continue
+                msg = msg.lower().strip()
+                frequency[msg] += 1
+    keys = list(frequency.keys())
+    keys.sort(key=lambda x: -frequency[x])
+    frequent_prompts = keys[:10]
+    frequent_prompts = set(frequent_prompts)
+    frequent_prompts.add("")
+    # Start filter
+    ct_bad_format = 0
+    ct_anonymized = 0
+    ct_redacted = 0
+    ct_error = 0
+    ct_lang_filter = 0
+    ct_flagged = 0
+    ct_blocked_word = 0
+    ct_blocked_model = 0
+    ct_too_short = 0
+    ct_too_frequent = 0
+    new_convs = []
+    for conv in tqdm(convs):
+        type_code = detect_type(conv)
+        if type_code == TypeCode.BAD_FORMAT:
+            ct_bad_format += 1
+            continue
+        if type_code == TypeCode.ANONYMIZED:
+            ct_anonymized += 1
+            continue
+        elif type_code == TypeCode.REDACTED:
+            ct_redacted += 1
+            continue
+        elif type_code == TypeCode.BLOCKED_WORD:
+            ct_blocked_word += 1
+            continue
+        elif type_code == TypeCode.BLOCKED_MODEL:
+            ct_blocked_model += 1
+            continue
+        elif type_code == TypeCode.TOO_SHORT:
+            ct_too_short += 1
+            continue
+        elif type_code == TypeCode.TOO_FREQUENT:
+            ct_too_frequent += 1
+            continue
+        if conv["openai_moderation"]["flagged"]:
+            ct_flagged += 1
+            continue
+        if type_code in [TypeCode.CORRECT]:
+            new_convs.append(conv)
+    if args.sample:
+        # random.seed(0)
+        # random.shuffle(new_convs)
+        new_convs = new_convs[: args.sample]
+    print(f"ct_anonymized: {ct_anonymized}, ct_redacted: {ct_redacted}")
+    print(f"ct_bad_format: {ct_bad_format}, ct_flagged: {ct_flagged}")
+    print(f"ct_blocked_word: {ct_blocked_word}, ct_blocked_model: {ct_blocked_model}")
+    print(f"ct_too_short: {ct_too_short}, ct_too_frequent: {ct_anonymized}")
+    print(f"new_conv: {len(new_convs)}")
+    out_file = args.in_file.replace(".json", ".out.json")
+    print(f"Output to {out_file}")
+    with open(out_file, "w") as fout:
+        json.dump(new_convs, fout, indent=2, ensure_ascii=False)

monitor/dataset_release_scripts/arena_33k/merge_field.py ADDED Viewed

	@@ -0,0 +1,25 @@

+"""Count the unique users in a battle log file."""
+import argparse
+import json
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input", type=str)
+    parser.add_argument("--tag-file", type=str)
+    args = parser.parse_args()
+    # build index
+    objs = json.load(open(args.tag_file))
+    new_field_dict = {}
+    for obj in objs:
+        new_field_dict[obj["question_id"]] = obj["toxic_chat"]
+    objs = json.load(open(args.input))
+    for obj in objs:
+        obj["toxic_chat_tag"] = new_field_dict[obj["question_id"]]
+    output = args.input.replace(".json", "_added.json")
+    with open(output, "w") as fout:
+        json.dump(objs, fout, indent=2, ensure_ascii=False)

monitor/dataset_release_scripts/arena_33k/sample.py ADDED Viewed

	@@ -0,0 +1,32 @@

+"""
+Count the unique users in a battle log file.
+Usage:
+python3 -input in.json --number 1000
+"""
+import argparse
+import json
+import random
+K = 1000
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input", type=str)
+    parser.add_argument("--number", type=int, nargs="+")
+    args = parser.parse_args()
+    convs = json.load(open(args.input))
+    random.seed(0)
+    random.shuffle(convs)
+    for number in args.number:
+        new_convs = convs[:number]
+        output = args.input.replace(".json", f"_{number//K}k.json")
+        with open(output, "w") as fout:
+            json.dump(new_convs, fout, indent=2, ensure_ascii=False)
+        print(f"#in: {len(convs)}, #out: {len(new_convs)}")
+        print(f"Write to file: {output}")

monitor/dataset_release_scripts/arena_33k/upload_hf_dataset.py ADDED Viewed

	@@ -0,0 +1,9 @@

+"""
+Upload to huggingface.
+"""
+import json
+from datasets import Dataset, DatasetDict, load_dataset
+objs = json.load(open("clean_battle_conv_20230630_tagged_v3_pii_33k_added.json"))
+data = Dataset.from_list(objs)
+data.push_to_hub("lmsys/chatbot_arena_conversations", private=True)

monitor/dataset_release_scripts/lmsys_chat_1m/approve_all.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import requests
+headers = {"authorization": "Bearer hf_XXX"}
+url = "https://huggingface.co/api/datasets/lmsys/lmsys-chat-1m/user-access-request/pending"
+a = requests.get(url, headers=headers)
+for u in a.json():
+    user = u["user"]["user"]
+    url = "https://huggingface.co/api/datasets/lmsys/lmsys-chat-1m/user-access-request/grant"
+    ret = requests.post(url, headers=headers, json={"user": user})
+    print(user, ret.status_code)
+    assert ret.status_code == 200

monitor/dataset_release_scripts/lmsys_chat_1m/compute_stats.py ADDED Viewed

	@@ -0,0 +1,119 @@

+"""
+From colab:
+https://colab.research.google.com/drive/1oMdw_Lqgmd6DletSOLHsyD-Rc96cRShs?usp=sharing
+"""
+import argparse
+import datetime
+import json
+import os
+from pytz import timezone
+import time
+import kaleido
+import numpy as np
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+from tqdm import tqdm
+import plotly.io as pio
+pio.kaleido.scope.mathjax = None
+parser = argparse.ArgumentParser()
+parser.add_argument("--in-file", type=str, required=True)
+parser.add_argument("--scale", type=int, required=True)
+args = parser.parse_args()
+filename = args.in_file
+scale = args.scale
+convs = json.load(open(filename))
+df = pd.DataFrame(convs)
+df
+print(f"#ips: {df['user_id'].nunique() * scale}")
+print(f"#models: {df['model'].nunique()}")
+print(f"#language: {df['language'].nunique()}")
+print(f"#turns: {df['turn'].mean()}")
+model_counts = df["model"].value_counts() * scale
+# print("model counts", model_counts)
+fig = px.bar(x=model_counts.index, y=model_counts)
+fig.update_layout(
+    xaxis_title=None,
+    yaxis_title="Count",
+    height=200,
+    width=950,
+    margin=dict(l=0, r=0, t=0, b=0),
+)
+fig.show()
+fig.write_image("model_count.pdf")
+model_counts = df["language"].value_counts().head(25) * scale
+fig = px.bar(x=model_counts.index, y=model_counts)
+fig.update_layout(
+    xaxis_title=None,
+    yaxis_title="Count",
+    height=200,
+    width=950,
+    margin=dict(l=0, r=0, t=0, b=0),
+)
+fig.show()
+fig.write_image("language_count.pdf")
+chat_dates = [
+    datetime.datetime.fromtimestamp(x, tz=timezone("US/Pacific")).strftime("%Y-%m-%d")
+    for x in df["tstamp"]
+]
+def to_remove(x):
+    for d in ["08-09", "08-08", "08-07", "08-06", "08-05", "08-04"]:
+        if d in x:
+            return True
+    return False
+chat_dates = [x for x in chat_dates if not to_remove(x)]
+chat_dates_counts = pd.value_counts(chat_dates) * scale
+print(f"mean #chat per day: {np.mean(chat_dates_counts):.2f}")
+fig = px.bar(x=chat_dates_counts.index, y=chat_dates_counts)
+fig.update_layout(
+    xaxis_title="Dates",
+    yaxis_title="Count",
+    height=200,
+    width=950,
+    margin=dict(l=0, r=0, t=0, b=0),
+)
+fig.show()
+fig.write_image("daily_conversation_count.pdf")
+import transformers
+tokenizer = transformers.AutoTokenizer.from_pretrained(
+    "lmsys/vicuna-7b-v1.5", use_fast=False
+)
+prompts = []
+responses = []
+for conv in df["conversation"]:
+    for row in conv:
+        if row["role"] == "user":
+            prompts.append(row["content"])
+        else:
+            responses.append(row["content"])
+print(f"#prompts: {len(prompts)}")
+print(f"#responses: {len(responses)}")
+prompt_lens = [len(tokenizer(x).input_ids) for x in tqdm(prompts)]
+print()
+print(f"mean prompt len: {np.mean(prompt_lens):.2f}")
+response_lens = [len(tokenizer(x).input_ids) if x else 0 for x in tqdm(responses)]
+print()
+print(f"mean response len: {np.mean(response_lens):.2f}")

monitor/dataset_release_scripts/lmsys_chat_1m/filter_bad_conv.py ADDED Viewed

	@@ -0,0 +1,148 @@

+"""
+Filter conversations for release.
+Dependency:
+pip install opencc-python-reimplementedpip install opencc-python-reimplemented
+Usage:
+python3 filter_bad_conv_lmsys_chat_1m.py --in clean_battle_conv_20230630_tagged_v1_pii.json
+"""
+import argparse
+from concurrent.futures import ProcessPoolExecutor
+from collections import defaultdict
+from enum import Enum, auto
+import json
+import os
+import random
+from tqdm import tqdm
+import opencc
+BLOCKED_WORDS_FILENAME = "blocked_words.json"
+blocked_words = []
+frequency = defaultdict(lambda: 0)
+cc_converter = opencc.OpenCC("t2s")
+class TypeCode(Enum):
+    CORRECT = auto()
+    ANONYMIZED = auto()
+    REDACTED = auto()
+    BAD_FORMAT = auto()
+    BLOCKED_WORD = auto()
+    BLOCKED_MODEL = auto()
+    TOO_SHORT = auto()
+    TOO_FREQUENT = auto()
+def detect_type(conv):
+    for key in ["conversation_a", "conversation_b", "conversation"]:
+        if key not in conv:
+            continue
+        messages = [row["content"] for row in conv[key]]
+        for msg in messages:
+            if not isinstance(msg, str):
+                return TypeCode.BAD_FORMAT
+        if len(messages) == 0:
+            return TypeCode.BAD_FORMAT
+        user_prompts = [
+            row["content"].lower().strip() for row in conv[key] if row["role"] == "user"
+        ]
+        for msg in messages:
+            msg = cc_converter.convert(msg.lower())
+            if "<anonymized>" in msg:
+                return TypeCode.ANONYMIZED
+            if "<redacted>" in msg:
+                return TypeCode.REDACTED
+            for w in blocked_words:
+                if w in msg:
+                    return TypeCode.BLOCKED_WORD
+    return TypeCode.CORRECT
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--in-file", type=str, required=True)
+    parser.add_argument("--sample", type=int)
+    args = parser.parse_args()
+    # Read conversations
+    convs = json.load(open(args.in_file))
+    print(f"#conv: {len(convs)}")
+    # Read blocked words
+    if os.path.exists(BLOCKED_WORDS_FILENAME):
+        blocked_words = json.load(open(BLOCKED_WORDS_FILENAME))
+        blocked_words = [cc_converter.convert(w) for w in blocked_words]
+    # Start filter
+    ct_bad_format = 0
+    ct_anonymized = 0
+    ct_redacted = 0
+    ct_error = 0
+    ct_lang_filter = 0
+    ct_flagged = 0
+    ct_blocked_word = 0
+    ct_blocked_model = 0
+    ct_too_short = 0
+    ct_too_frequent = 0
+    type_codes = []
+    with ProcessPoolExecutor() as executor:
+        for result in tqdm(executor.map(detect_type, convs), total=len(convs)):
+            type_codes.append(result)
+    new_convs = []
+    for conv, type_code in zip(convs, type_codes):
+        if type_code == TypeCode.BAD_FORMAT:
+            ct_bad_format += 1
+            continue
+        if type_code == TypeCode.ANONYMIZED:
+            ct_anonymized += 1
+            continue
+        elif type_code == TypeCode.REDACTED:
+            ct_redacted += 1
+            continue
+        elif type_code == TypeCode.BLOCKED_WORD:
+            ct_blocked_word += 1
+            continue
+        elif type_code == TypeCode.BLOCKED_MODEL:
+            ct_blocked_model += 1
+            continue
+        elif type_code == TypeCode.TOO_SHORT:
+            ct_too_short += 1
+            continue
+        elif type_code == TypeCode.TOO_FREQUENT:
+            ct_too_frequent += 1
+            continue
+        if "openai_moderation" in conv and conv["openai_moderation"]["flagged"]:
+            ct_flagged += 1
+            continue
+        if type_code in [TypeCode.CORRECT]:
+            new_convs.append(conv)
+    if args.sample:
+        random.seed(42)
+        random.shuffle(new_convs)
+        new_convs = new_convs[: args.sample]
+    print(f"ct_anonymized: {ct_anonymized}, ct_redacted: {ct_redacted}")
+    print(f"ct_bad_format: {ct_bad_format}, ct_flagged: {ct_flagged}")
+    print(f"ct_blocked_word: {ct_blocked_word}, ct_blocked_model: {ct_blocked_model}")
+    print(f"ct_too_short: {ct_too_short}, ct_too_frequent: {ct_too_frequent}")
+    print(f"new_conv: {len(new_convs)}")
+    out_file = args.in_file.replace(".json", ".s1.json")
+    print(f"Output to {out_file}")
+    with open(out_file, "w") as fout:
+        json.dump(new_convs, fout, indent=2, ensure_ascii=False)

monitor/dataset_release_scripts/lmsys_chat_1m/final_post_processing.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import argparse
+import json
+from tqdm import tqdm
+import numpy as np
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--in-file", type=str, required=True)
+    args = parser.parse_args()
+    # Read conversations
+    convs = json.load(open(args.in_file))
+    print(f"#conv: {len(convs)}")
+    # Delete some fileds
+    for c in convs:
+        del c["tstamp"]
+        del c["user_id"]
+    # Write
+    print(f"#out conv: {len(convs)}")
+    out_file = args.in_file.replace(".json", ".s2.json")
+    print(f"Output to {out_file}")
+    with open(out_file, "w") as fout:
+        json.dump(convs, fout, indent=2, ensure_ascii=False)

monitor/dataset_release_scripts/lmsys_chat_1m/instructions.md ADDED Viewed

	@@ -0,0 +1,23 @@

+```
+export BASE=clean_conv_20230809_100k_pii
+export SCALE=10
+# filter words
+python3 filter_bad_conv.py --in $BASE.json
+# Clean up some fileds (e.g., timestamps)
+python3 final_post_processing.py --in $BASE.s1.json
+# upload to hf
+python3 upload_hf_dataset.py --in $BASE.s1.s2.json
+# Make another version with openai moderation tag
+python3 merge_oai_tag.py --in $BASE.s1.s2.json
+# Make visualizations
+python3 compute_stats.py --in $BASE.s1.json --scale $SCALE
+# Copy figures
+scp "atlas:/data/lmzheng/FastChat/fastchat/serve/monitor/dataset_release_scripts/lmsys_chat_1m/*.pdf" .
+```

monitor/dataset_release_scripts/lmsys_chat_1m/merge_oai_tag.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import argparse
+import json
+import time
+from tqdm import tqdm
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--in-file", type=str, required=True)
+    parser.add_argument("--sample", type=int)
+    args = parser.parse_args()
+    tag_file = "clean_conv_20230809_1.5M_oai_filter_v2.json"
+    # tag_file = "clean_conv_20230809_1.5M_oai_filter_v2_100k.json"
+    in_file = args.in_file
+    tic = time.time()
+    # Load tags
+    print("Load tags...")
+    tag_data = json.load(open(tag_file))
+    tag_dict = {}
+    for c in tqdm(tag_data):
+        tag_dict[c["conversation_id"]] = [x["oai_filter"] for x in c["conversation"]]
+    print(f"elapsed: {time.time() - tic:.2f} s")
+    # Append to input_file
+    print("Load inputs...")
+    input_data = json.load(open(in_file))
+    for c in tqdm(input_data):
+        cid = c["conversation_id"]
+        if cid in tag_dict:
+            c["openai_moderation"] = tag_dict[cid]
+        else:
+            print(f"missing tag for conv {cid}")
+            exit()
+    print(f"elapsed: {time.time() - tic:.2f} s")
+    # Write output
+    print("Write outputs...")
+    out_file = in_file.replace(".json", ".with_tag.json")
+    print(f"Output to {out_file}")
+    with open(out_file, "w") as fout:
+        json.dump(input_data, fout, indent=2, ensure_ascii=False)
+    print(f"elapsed: {time.time() - tic:.2f} s")

monitor/dataset_release_scripts/lmsys_chat_1m/process_all.sh ADDED Viewed

	@@ -0,0 +1,18 @@

+export BASE=clean_conv_20230809_1.5M_pii
+#export BASE=clean_conv_20230809_100k_pii
+export SCALE=1
+# Filter words
+python3 filter_bad_conv.py --in $BASE.json --sample 1000000
+# Clean up some fileds (e.g., timestamps)
+python3 final_post_processing.py --in $BASE.s1.json
+# Upload to hf
+python3 upload_hf_dataset.py --in $BASE.s1.s2.json
+# Make another version with openai moderation tag
+python3 merge_oai_tag.py --in $BASE.s1.s2.json
+# Make visualizations
+python3 compute_stats.py --in $BASE.s1.json --scale $SCALE

monitor/dataset_release_scripts/lmsys_chat_1m/sample.py ADDED Viewed

	@@ -0,0 +1,32 @@

+"""
+Count the unique users in a battle log file.
+Usage:
+python3 -input in.json --number 1000
+"""
+import argparse
+import json
+import random
+K = 1000
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input", type=str)
+    parser.add_argument("--number", type=int, nargs="+")
+    args = parser.parse_args()
+    convs = json.load(open(args.input))
+    random.seed(42)
+    random.shuffle(convs)
+    for number in args.number:
+        new_convs = convs[:number]
+        output = args.input.replace(".json", f"_{number//K}k.json")
+        with open(output, "w") as fout:
+            json.dump(new_convs, fout, indent=2, ensure_ascii=False)
+        print(f"#in: {len(convs)}, #out: {len(new_convs)}")
+        print(f"Write to file: {output}")

monitor/dataset_release_scripts/lmsys_chat_1m/upload_hf_dataset.py ADDED Viewed

	@@ -0,0 +1,17 @@

+"""
+Upload to huggingface.
+"""
+import argparse
+import json
+from datasets import Dataset, DatasetDict, load_dataset
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--in-file", type=str, required=True)
+    args = parser.parse_args()
+    objs = json.load(open(args.in_file))
+    print(f"#convs: {len(objs)}")
+    data = Dataset.from_list(objs)
+    data.push_to_hub("lmsys/lmsys-chat-1m", private=True)

monitor/elo_analysis.py ADDED Viewed

	@@ -0,0 +1,303 @@

+import argparse
+from collections import defaultdict
+import datetime
+import json
+import math
+import pickle
+from pytz import timezone
+import numpy as np
+import pandas as pd
+import plotly.express as px
+from tqdm import tqdm
+from fastchat.model.model_registry import get_model_info
+from fastchat.serve.monitor.basic_stats import get_log_files
+from fastchat.serve.monitor.clean_battle_data import clean_battle_data
+pd.options.display.float_format = "{:.2f}".format
+def compute_elo(battles, K=4, SCALE=400, BASE=10, INIT_RATING=1000):
+    rating = defaultdict(lambda: INIT_RATING)
+    for rd, model_a, model_b, winner in battles[
+        ["model_a", "model_b", "winner"]
+    ].itertuples():
+        ra = rating[model_a]
+        rb = rating[model_b]
+        ea = 1 / (1 + BASE ** ((rb - ra) / SCALE))
+        eb = 1 / (1 + BASE ** ((ra - rb) / SCALE))
+        if winner == "model_a":
+            sa = 1
+        elif winner == "model_b":
+            sa = 0
+        elif winner == "tie" or winner == "tie (bothbad)":
+            sa = 0.5
+        else:
+            raise Exception(f"unexpected vote {winner}")
+        rating[model_a] += K * (sa - ea)
+        rating[model_b] += K * (1 - sa - eb)
+    return dict(rating)
+def get_bootstrap_result(battles, func_compute_elo, num_round=1000):
+    rows = []
+    for i in tqdm(range(num_round), desc="bootstrap"):
+        tmp_battles = battles.sample(frac=1.0, replace=True)
+        rows.append(func_compute_elo(tmp_battles))
+    df = pd.DataFrame(rows)
+    return df[df.median().sort_values(ascending=False).index]
+def get_median_elo_from_bootstrap(bootstrap_df):
+    median = dict(bootstrap_df.quantile(0.5))
+    median = {k: int(v + 0.5) for k, v in median.items()}
+    return median
+def compute_pairwise_win_fraction(battles, model_order, limit_show_number=None):
+    # Times each model wins as Model A
+    a_win_ptbl = pd.pivot_table(
+        battles[battles["winner"] == "model_a"],
+        index="model_a",
+        columns="model_b",
+        aggfunc="size",
+        fill_value=0,
+    )
+    # Table counting times each model wins as Model B
+    b_win_ptbl = pd.pivot_table(
+        battles[battles["winner"] == "model_b"],
+        index="model_a",
+        columns="model_b",
+        aggfunc="size",
+        fill_value=0,
+    )
+    # Table counting number of A-B pairs
+    num_battles_ptbl = pd.pivot_table(
+        battles, index="model_a", columns="model_b", aggfunc="size", fill_value=0
+    )
+    # Computing the proportion of wins for each model as A and as B
+    # against all other models
+    row_beats_col_freq = (a_win_ptbl + b_win_ptbl.T) / (
+        num_battles_ptbl + num_battles_ptbl.T
+    )
+    if model_order is None:
+        prop_wins = row_beats_col_freq.mean(axis=1).sort_values(ascending=False)
+        model_order = list(prop_wins.keys())
+    if limit_show_number is not None:
+        model_order = model_order[:limit_show_number]
+    # Arrange ordering according to proprition of wins
+    row_beats_col = row_beats_col_freq.loc[model_order, model_order]
+    return row_beats_col
+def visualize_leaderboard_table(rating):
+    models = list(rating.keys())
+    models.sort(key=lambda k: -rating[k])
+    emoji_dict = {
+        1: "🥇",
+        2: "🥈",
+        3: "🥉",
+    }
+    md = ""
+    md += "| Rank | Model | Elo Rating | Description |\n"
+    md += "| --- | --- | --- | --- |\n"
+    for i, model in enumerate(models):
+        rank = i + 1
+        minfo = get_model_info(model)
+        emoji = emoji_dict.get(rank, "")
+        md += f"| {rank} | {emoji} [{model}]({minfo.link}) | {rating[model]:.0f} | {minfo.description} |\n"
+    return md
+def visualize_pairwise_win_fraction(battles, model_order):
+    row_beats_col = compute_pairwise_win_fraction(battles, model_order)
+    fig = px.imshow(
+        row_beats_col,
+        color_continuous_scale="RdBu",
+        text_auto=".2f",
+        height=700,
+        width=700,
+    )
+    fig.update_layout(
+        xaxis_title="Model B",
+        yaxis_title="Model A",
+        xaxis_side="top",
+        title_y=0.07,
+        title_x=0.5,
+    )
+    fig.update_traces(
+        hovertemplate="Model A: %{y}<br>Model B: %{x}<br>Fraction of A Wins: %{z}<extra></extra>"
+    )
+    return fig
+def visualize_battle_count(battles, model_order):
+    ptbl = pd.pivot_table(
+        battles, index="model_a", columns="model_b", aggfunc="size", fill_value=0
+    )
+    battle_counts = ptbl + ptbl.T
+    fig = px.imshow(
+        battle_counts.loc[model_order, model_order],
+        text_auto=True,
+        height=700,
+        width=700,
+    )
+    fig.update_layout(
+        xaxis_title="Model B",
+        yaxis_title="Model A",
+        xaxis_side="top",
+        title_y=0.07,
+        title_x=0.5,
+    )
+    fig.update_traces(
+        hovertemplate="Model A: %{y}<br>Model B: %{x}<br>Count: %{z}<extra></extra>"
+    )
+    return fig
+def visualize_average_win_rate(battles, limit_show_number):
+    row_beats_col_freq = compute_pairwise_win_fraction(
+        battles, None, limit_show_number=limit_show_number
+    )
+    fig = px.bar(
+        row_beats_col_freq.mean(axis=1).sort_values(ascending=False),
+        text_auto=".2f",
+        height=500,
+        width=700,
+    )
+    fig.update_layout(
+        yaxis_title="Average Win Rate", xaxis_title="Model", showlegend=False
+    )
+    return fig
+def visualize_bootstrap_elo_rating(df, limit_show_number):
+    bars = (
+        pd.DataFrame(
+            dict(
+                lower=df.quantile(0.025),
+                rating=df.quantile(0.5),
+                upper=df.quantile(0.975),
+            )
+        )
+        .reset_index(names="model")
+        .sort_values("rating", ascending=False)
+    )
+    bars = bars[:limit_show_number]
+    bars["error_y"] = bars["upper"] - bars["rating"]
+    bars["error_y_minus"] = bars["rating"] - bars["lower"]
+    bars["rating_rounded"] = np.round(bars["rating"], 2)
+    fig = px.scatter(
+        bars,
+        x="model",
+        y="rating",
+        error_y="error_y",
+        error_y_minus="error_y_minus",
+        text="rating_rounded",
+        height=500,
+        width=700,
+    )
+    fig.update_layout(xaxis_title="Model", yaxis_title="Rating")
+    return fig
+def report_elo_analysis_results(battles_json):
+    battles = pd.DataFrame(battles_json)
+    battles = battles.sort_values(ascending=True, by=["tstamp"])
+    # Only use anonymous votes
+    battles = battles[battles["anony"]].reset_index(drop=True)
+    battles_no_ties = battles[~battles["winner"].str.contains("tie")]
+    # Online update
+    elo_rating_online = compute_elo(battles)
+    # Bootstrap
+    bootstrap_df = get_bootstrap_result(battles, compute_elo)
+    elo_rating_median = get_median_elo_from_bootstrap(bootstrap_df)
+    model_order = list(elo_rating_median.keys())
+    model_order.sort(key=lambda k: -elo_rating_median[k])
+    limit_show_number = 25  # limit show number to make plots smaller
+    model_order = model_order[:limit_show_number]
+    # Plots
+    leaderboard_table = visualize_leaderboard_table(elo_rating_median)
+    win_fraction_heatmap = visualize_pairwise_win_fraction(battles_no_ties, model_order)
+    battle_count_heatmap = visualize_battle_count(battles_no_ties, model_order)
+    average_win_rate_bar = visualize_average_win_rate(
+        battles_no_ties, limit_show_number
+    )
+    bootstrap_elo_rating = visualize_bootstrap_elo_rating(
+        bootstrap_df, limit_show_number
+    )
+    last_updated_tstamp = battles["tstamp"].max()
+    last_updated_datetime = datetime.datetime.fromtimestamp(
+        last_updated_tstamp, tz=timezone("US/Pacific")
+    ).strftime("%Y-%m-%d %H:%M:%S %Z")
+    return {
+        "elo_rating_online": elo_rating_online,
+        "elo_rating_median": elo_rating_median,
+        "leaderboard_table": leaderboard_table,
+        "win_fraction_heatmap": win_fraction_heatmap,
+        "battle_count_heatmap": battle_count_heatmap,
+        "average_win_rate_bar": average_win_rate_bar,
+        "bootstrap_elo_rating": bootstrap_elo_rating,
+        "last_updated_datetime": last_updated_datetime,
+        "last_updated_tstamp": last_updated_tstamp,
+    }
+def pretty_print_elo_rating(rating):
+    model_order = list(rating.keys())
+    model_order.sort(key=lambda k: -rating[k])
+    for i, model in enumerate(model_order):
+        print(f"{i+1:2d}, {model:25s}, {rating[model]:.0f}")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--clean-battle-file", type=str)
+    parser.add_argument("--max-num-files", type=int)
+    args = parser.parse_args()
+    np.random.seed(42)
+    if args.clean_battle_file:
+        # Read data from a cleaned battle files
+        battles = pd.read_json(args.clean_battle_file)
+    else:
+        # Read data from all log files
+        log_files = get_log_files(args.max_num_files)
+        battles = clean_battle_data(log_files)
+    results = report_elo_analysis_results(battles)
+    print("# Online")
+    pretty_print_elo_rating(results["elo_rating_online"])
+    print("# Median")
+    pretty_print_elo_rating(results["elo_rating_median"])
+    print(f"last update : {results['last_updated_datetime']}")
+    last_updated_tstamp = results["last_updated_tstamp"]
+    cutoff_date = datetime.datetime.fromtimestamp(
+        last_updated_tstamp, tz=timezone("US/Pacific")
+    ).strftime("%Y%m%d")
+    with open(f"elo_results_{cutoff_date}.pkl", "wb") as fout:
+        pickle.dump(results, fout)

monitor/inspect_conv.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import argparse
+import code
+import datetime
+import json
+import os
+from pytz import timezone
+import time
+import pandas as pd
+from tqdm import tqdm
+def get_log_files(max_num_files=None):
+    dates = []
+    for month in [4, 5]:
+        for day in range(1, 32):
+            dates.append(f"2023-{month:02d}-{day:02d}")
+    num_servers = 14
+    filenames = []
+    for d in dates:
+        for i in range(num_servers):
+            name = os.path.expanduser(f"~/fastchat_logs/server{i}/{d}-conv.json")
+            if os.path.exists(name):
+                filenames.append(name)
+    max_num_files = max_num_files or len(filenames)
+    filenames = filenames[-max_num_files:]
+    return filenames
+def pretty_print_conversation(messages):
+    for role, msg in messages:
+        print(f"[[{role}]]: {msg}")
+def inspect_convs(log_files):
+    data = []
+    for filename in tqdm(log_files, desc="read files"):
+        for retry in range(5):
+            try:
+                lines = open(filename).readlines()
+                break
+            except FileNotFoundError:
+                time.sleep(2)
+        for l in lines:
+            row = json.loads(l)
+            if "states" not in row:
+                continue
+            if row["type"] not in ["leftvote", "rightvote", "bothbad_vote"]:
+                continue
+            model_names = row["states"][0]["model_name"], row["states"][1]["model_name"]
+            if row["type"] == "leftvote":
+                winner, loser = model_names[0], model_names[1]
+                winner_conv, loser_conv = row["states"][0], row["states"][1]
+            elif row["type"] == "rightvote":
+                loser, winner = model_names[0], model_names[1]
+                loser_conv, winner_conv = row["states"][0], row["states"][1]
+            if loser == "bard" and winner == "vicuna-13b":
+                print("=" * 20)
+                print(f"Winner: {winner}")
+                pretty_print_conversation(winner_conv["messages"])
+                print(f"Loser: {loser}")
+                pretty_print_conversation(loser_conv["messages"])
+                print("=" * 20)
+                input()
+            # if row["type"] == "bothbad_vote" and "gpt-4" in model_names:
+            #    print("=" * 20)
+            #    print(f"Model A: {model_names[0]}")
+            #    pretty_print_conversation(row["states"][0]["messages"])
+            #    print(f"Model B: {model_names[1]}")
+            #    pretty_print_conversation(row["states"][1]["messages"])
+            #    print("=" * 20)
+            #    input()
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--max-num-files", type=int)
+    args = parser.parse_args()
+    log_files = get_log_files(args.max_num_files)
+    inspect_convs(log_files)

monitor/intersect_conv_file.py ADDED Viewed

	@@ -0,0 +1,25 @@

+"""
+Take the intersection of two conversation files.
+Usage: python3 -m fastchat.data.merge --input input.json --conv-id conv_id_file.json --out intersect.json
+"""
+import argparse
+import json
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input", type=str, required=True)
+    parser.add_argument("--conv-id", type=str, required=True)
+    parser.add_argument("--out-file", type=str, default="intersect.json")
+    args = parser.parse_args()
+    conv_id_objs = json.load(open(args.conv_id, "r"))
+    conv_ids = set(x["conversation_id"] for x in conv_id_objs)
+    objs = json.load(open(args.input, "r"))
+    after_objs = [x for x in objs if x["conversation_id"] in conv_ids]
+    print(f"#in: {len(objs)}, #out: {len(after_objs)}")
+    json.dump(after_objs, open(args.out_file, "w"), indent=2, ensure_ascii=False)

monitor/leaderboard_csv_to_html.py ADDED Viewed

	@@ -0,0 +1,51 @@

+"""
+Convert a leaderboard csv file to html table used in the blog.
+Usage:
+python3 leaderboard_csv_to_html.py --in leaderboard_table_20230619.csv
+"""
+import argparse
+import numpy as np
+from fastchat.serve.monitor.monitor import load_leaderboard_table_csv
+def model_hyperlink(model_name, link):
+    return f'<a target="_blank" href="{link}"> {model_name} </a>'
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input", type=str, required=True)
+    args = parser.parse_args()
+    data = load_leaderboard_table_csv(args.input, add_hyperlink=False)
+    headers = [
+        "Model",
+        "MT-bench (score)",
+        "Arena Elo rating",
+        "MMLU",
+        "License",
+    ]
+    values = []
+    for item in data:
+        row = []
+        for key in headers:
+            value = item[key]
+            row.append(value)
+        row[0] = model_hyperlink(item["Model"], item["Link"])
+        values.append(row)
+    values.sort(key=lambda x: -x[1] if not np.isnan(x[1]) else 1e9)
+    for value in values:
+        row = "<tr>"
+        for x in value:
+            try:
+                if np.isnan(x):
+                    x = "-"
+            except TypeError:
+                pass
+            row += f" <td>{x}</td> "
+        row += "</tr>"
+        print(row)

monitor/monitor.py ADDED Viewed

	@@ -0,0 +1,313 @@

+"""
+Live monitor of the website statistics and leaderboard.
+Dependency:
+sudo apt install pkg-config libicu-dev
+pip install pytz gradio gdown plotly polyglot pyicu pycld2 tabulate
+"""
+import argparse
+import ast
+import pickle
+import os
+import threading
+import time
+import gradio as gr
+import numpy as np
+from fastchat.serve.monitor.basic_stats import report_basic_stats, get_log_files
+from fastchat.serve.monitor.clean_battle_data import clean_battle_data
+from fastchat.serve.monitor.elo_analysis import report_elo_analysis_results
+from fastchat.utils import build_logger, get_window_url_params_js
+notebook_url = "https://colab.research.google.com/drive/1RAWb22-PFNI-X1gPVzc927SGUdfr6nsR?usp=sharing"
+basic_component_values = [None] * 6
+leader_component_values = [None] * 5
+def make_leaderboard_md(elo_results):
+    leaderboard_md = f"""
+# 🏆 Chatbot Arena Leaderboard
+| [Blog](https://lmsys.org/blog/2023-05-03-arena/) | [GitHub](https://github.com/lm-sys/FastChat) | [Paper](https://arxiv.org/abs/2306.05685) | [Dataset](https://github.com/lm-sys/FastChat/blob/main/docs/dataset_release.md) | [Twitter](https://twitter.com/lmsysorg) | [Discord](https://discord.gg/HSWAKCrnFx) |
+This leaderboard is based on the following three benchmarks.
+- [Chatbot Arena](https://lmsys.org/blog/2023-05-03-arena/) - a crowdsourced, randomized battle platform. We use 100K+ user votes to compute Elo ratings.
+- [MT-Bench](https://arxiv.org/abs/2306.05685) - a set of challenging multi-turn questions. We use GPT-4 to grade the model responses.
+- [MMLU](https://arxiv.org/abs/2009.03300) (5-shot) - a test to measure a model's multitask accuracy on 57 tasks.
+💻 Code: The Arena Elo ratings are computed by this [notebook]({notebook_url}). The MT-bench scores (single-answer grading on a scale of 10) are computed by [fastchat.llm_judge](https://github.com/lm-sys/FastChat/tree/main/fastchat/llm_judge). The MMLU scores are mostly computed by [InstructEval](https://github.com/declare-lab/instruct-eval). Higher values are better for all benchmarks. Empty cells mean not available. Last updated: November, 2023.
+"""
+    return leaderboard_md
+def make_leaderboard_md_live(elo_results):
+    leaderboard_md = f"""
+# Leaderboard
+Last updated: {elo_results["last_updated_datetime"]}
+{elo_results["leaderboard_table"]}
+"""
+    return leaderboard_md
+def update_elo_components(max_num_files, elo_results_file):
+    log_files = get_log_files(max_num_files)
+    # Leaderboard
+    if elo_results_file is None:  # Do live update
+        battles = clean_battle_data(log_files, [])
+        elo_results = report_elo_analysis_results(battles)
+        leader_component_values[0] = make_leaderboard_md_live(elo_results)
+        leader_component_values[1] = elo_results["win_fraction_heatmap"]
+        leader_component_values[2] = elo_results["battle_count_heatmap"]
+        leader_component_values[3] = elo_results["bootstrap_elo_rating"]
+        leader_component_values[4] = elo_results["average_win_rate_bar"]
+    # Basic stats
+    basic_stats = report_basic_stats(log_files)
+    md0 = f"Last updated: {basic_stats['last_updated_datetime']}"
+    md1 = "### Action Histogram\n"
+    md1 += basic_stats["action_hist_md"] + "\n"
+    md2 = "### Anony. Vote Histogram\n"
+    md2 += basic_stats["anony_vote_hist_md"] + "\n"
+    md3 = "### Model Call Histogram\n"
+    md3 += basic_stats["model_hist_md"] + "\n"
+    md4 = "### Model Call (Last 24 Hours)\n"
+    md4 += basic_stats["num_chats_last_24_hours"] + "\n"
+    basic_component_values[0] = md0
+    basic_component_values[1] = basic_stats["chat_dates_bar"]
+    basic_component_values[2] = md1
+    basic_component_values[3] = md2
+    basic_component_values[4] = md3
+    basic_component_values[5] = md4
+def update_worker(max_num_files, interval, elo_results_file):
+    while True:
+        tic = time.time()
+        update_elo_components(max_num_files, elo_results_file)
+        durtaion = time.time() - tic
+        print(f"update duration: {durtaion:.2f} s")
+        time.sleep(max(interval - durtaion, 0))
+def load_demo(url_params, request: gr.Request):
+    logger.info(f"load_demo. ip: {request.client.host}. params: {url_params}")
+    return basic_component_values + leader_component_values
+def model_hyperlink(model_name, link):
+    return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
+def load_leaderboard_table_csv(filename, add_hyperlink=True):
+    lines = open(filename).readlines()
+    heads = [v.strip() for v in lines[0].split(",")]
+    rows = []
+    for i in range(1, len(lines)):
+        row = [v.strip() for v in lines[i].split(",")]
+        for j in range(len(heads)):
+            item = {}
+            for h, v in zip(heads, row):
+                if h == "Arena Elo rating":
+                    if v != "-":
+                        v = int(ast.literal_eval(v))
+                    else:
+                        v = np.nan
+                elif h == "MMLU":
+                    if v != "-":
+                        v = round(ast.literal_eval(v) * 100, 1)
+                    else:
+                        v = np.nan
+                elif h == "MT-bench (win rate %)":
+                    if v != "-":
+                        v = round(ast.literal_eval(v[:-1]), 1)
+                    else:
+                        v = np.nan
+                elif h == "MT-bench (score)":
+                    if v != "-":
+                        v = round(ast.literal_eval(v), 2)
+                    else:
+                        v = np.nan
+                item[h] = v
+            if add_hyperlink:
+                item["Model"] = model_hyperlink(item["Model"], item["Link"])
+        rows.append(item)
+    return rows
+def build_basic_stats_tab():
+    empty = "Loading ..."
+    basic_component_values[:] = [empty, None, empty, empty, empty, empty]
+    md0 = gr.Markdown(empty)
+    gr.Markdown("#### Figure 1: Number of model calls and votes")
+    plot_1 = gr.Plot(show_label=False)
+    with gr.Row():
+        with gr.Column():
+            md1 = gr.Markdown(empty)
+        with gr.Column():
+            md2 = gr.Markdown(empty)
+    with gr.Row():
+        with gr.Column():
+            md3 = gr.Markdown(empty)
+        with gr.Column():
+            md4 = gr.Markdown(empty)
+    return [md0, plot_1, md1, md2, md3, md4]
+def build_leaderboard_tab(elo_results_file, leaderboard_table_file):
+    if elo_results_file is None:  # Do live update
+        md = "Loading ..."
+        p1 = p2 = p3 = p4 = None
+    else:
+        with open(elo_results_file, "rb") as fin:
+            elo_results = pickle.load(fin)
+        md = make_leaderboard_md(elo_results)
+        p1 = elo_results["win_fraction_heatmap"]
+        p2 = elo_results["battle_count_heatmap"]
+        p3 = elo_results["bootstrap_elo_rating"]
+        p4 = elo_results["average_win_rate_bar"]
+    md_1 = gr.Markdown(md, elem_id="leaderboard_markdown")
+    if leaderboard_table_file:
+        data = load_leaderboard_table_csv(leaderboard_table_file)
+        headers = [
+            "Model",
+            "Arena Elo rating",
+            "MT-bench (score)",
+            "MMLU",
+            "License",
+        ]
+        values = []
+        for item in data:
+            row = []
+            for key in headers:
+                value = item[key]
+                row.append(value)
+            values.append(row)
+        values.sort(key=lambda x: -x[1] if not np.isnan(x[1]) else 1e9)
+        headers[1] = "⭐ " + headers[1]
+        headers[2] = "📈 " + headers[2]
+        gr.Dataframe(
+            headers=headers,
+            datatype=["markdown", "number", "number", "number", "str"],
+            value=values,
+            elem_id="leaderboard_dataframe",
+        )
+        gr.Markdown(
+            """ ## Visit our [HF space](https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard) for more analysis!
+            If you want to see more models, please help us [add them](https://github.com/lm-sys/FastChat/blob/main/docs/arena.md#how-to-add-a-new-model).
+            """,
+            elem_id="leaderboard_markdown",
+        )
+    else:
+        pass
+    leader_component_values[:] = [md, p1, p2, p3, p4]
+    """
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown(
+                "#### Figure 1: Fraction of Model A Wins for All Non-tied A vs. B Battles"
+            )
+            plot_1 = gr.Plot(p1, show_label=False)
+        with gr.Column():
+            gr.Markdown(
+                "#### Figure 2: Battle Count for Each Combination of Models (without Ties)"
+            )
+            plot_2 = gr.Plot(p2, show_label=False)
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown(
+                "#### Figure 3: Bootstrap of Elo Estimates (1000 Rounds of Random Sampling)"
+            )
+            plot_3 = gr.Plot(p3, show_label=False)
+        with gr.Column():
+            gr.Markdown(
+                "#### Figure 4: Average Win Rate Against All Other Models (Assuming Uniform Sampling and No Ties)"
+            )
+            plot_4 = gr.Plot(p4, show_label=False)
+    """
+    from fastchat.serve.gradio_web_server import acknowledgment_md
+    gr.Markdown(acknowledgment_md)
+    # return [md_1, plot_1, plot_2, plot_3, plot_4]
+    return [md_1]
+def build_demo(elo_results_file, leaderboard_table_file):
+    from fastchat.serve.gradio_web_server import block_css
+    text_size = gr.themes.sizes.text_lg
+    with gr.Blocks(
+        title="Monitor",
+        theme=gr.themes.Base(text_size=text_size),
+        css=block_css,
+    ) as demo:
+        with gr.Tabs() as tabs:
+            with gr.Tab("Leaderboard", id=0):
+                leader_components = build_leaderboard_tab(
+                    elo_results_file, leaderboard_table_file
+                )
+            with gr.Tab("Basic Stats", id=1):
+                basic_components = build_basic_stats_tab()
+        url_params = gr.JSON(visible=False)
+        demo.load(
+            load_demo,
+            [url_params],
+            basic_components + leader_components,
+            _js=get_window_url_params_js,
+        )
+    return demo
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--host", type=str, default="0.0.0.0")
+    parser.add_argument("--port", type=int)
+    parser.add_argument("--share", action="store_true")
+    parser.add_argument("--concurrency-count", type=int, default=10)
+    parser.add_argument("--update-interval", type=int, default=300)
+    parser.add_argument("--max-num-files", type=int)
+    parser.add_argument("--elo-results-file", type=str)
+    parser.add_argument("--leaderboard-table-file", type=str)
+    args = parser.parse_args()
+    logger = build_logger("monitor", "monitor.log")
+    logger.info(f"args: {args}")
+    if args.elo_results_file is None:  # Do live update
+        update_thread = threading.Thread(
+            target=update_worker,
+            args=(args.max_num_files, args.update_interval, args.elo_results_file),
+        )
+        update_thread.start()
+    demo = build_demo(args.elo_results_file, args.leaderboard_table_file)
+    demo.queue(
+        concurrency_count=args.concurrency_count, status_update_rate=10, api_open=False
+    ).launch(
+        server_name=args.host, server_port=args.port, share=args.share, max_threads=200
+    )