Spaces:

hayas
/

CALM2-7B-chat

Running on Zero

App Files Files Community

hayas commited on Nov 3, 2023

Commit

f0dff07

•

1 Parent(s): 9cc5e5a

Add files

Browse files

Files changed (6) hide show

.pre-commit-config.yaml +55 -0
.vscode/settings.json +21 -0
README.md +5 -4
app.py +137 -0
requirements.txt +9 -0
style.css +16 -0

.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,55 @@

+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: check-executables-have-shebangs
+      - id: check-json
+      - id: check-merge-conflict
+      - id: check-shebang-scripts-are-executable
+      - id: check-toml
+      - id: check-yaml
+      - id: end-of-file-fixer
+      - id: mixed-line-ending
+        args: ["--fix=lf"]
+      - id: requirements-txt-fixer
+      - id: trailing-whitespace
+  - repo: https://github.com/myint/docformatter
+    rev: v1.7.5
+    hooks:
+      - id: docformatter
+        args: ["--in-place"]
+  - repo: https://github.com/pycqa/isort
+    rev: 5.12.0
+    hooks:
+      - id: isort
+        args: ["--profile", "black"]
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.6.1
+    hooks:
+      - id: mypy
+        args: ["--ignore-missing-imports"]
+        additional_dependencies:
+          ["types-python-slugify", "types-requests", "types-PyYAML"]
+  - repo: https://github.com/psf/black
+    rev: 23.10.1
+    hooks:
+      - id: black
+        language_version: python3.10
+        args: ["--line-length", "119"]
+  - repo: https://github.com/kynan/nbstripout
+    rev: 0.6.1
+    hooks:
+      - id: nbstripout
+        args:
+          [
+            "--extra-keys",
+            "metadata.interpreter metadata.kernelspec cell.metadata.pycharm",
+          ]
+  - repo: https://github.com/nbQA-dev/nbQA
+    rev: 1.7.0
+    hooks:
+      - id: nbqa-black
+      - id: nbqa-pyupgrade
+        args: ["--py37-plus"]
+      - id: nbqa-isort
+        args: ["--float-to-top"]

.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+    "[python]": {
+        "editor.defaultFormatter": "ms-python.black-formatter",
+        "editor.formatOnType": true,
+        "editor.codeActionsOnSave": {
+            "source.organizeImports": true
+        }
+    },
+    "black-formatter.args": [
+        "--line-length=119"
+    ],
+    "isort.args": ["--profile", "black"],
+    "flake8.args": [
+        "--max-line-length=119"
+    ],
+    "ruff.args": [
+        "--line-length=119"
+    ],
+    "editor.formatOnSave": true,
+    "files.insertFinalNewline": true
+}

README.md CHANGED Viewed

@@ -1,13 +1,14 @@
 ---
-title: CALM2 7B Chat
-emoji: 📉
-colorFrom: gray
-colorTo: green
 sdk: gradio
 sdk_version: 4.0.2
 app_file: app.py
 pinned: false
 license: mit
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: CALM2-7B-chat
+emoji: ⚡
+colorFrom: red
+colorTo: purple
 sdk: gradio
 sdk_version: 4.0.2
 app_file: app.py
 pinned: false
 license: mit
+suggested-hardware: t4-small
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,137 @@

+#!/usr/bin/env python
+import os
+from threading import Thread
+from typing import Iterator
+import gradio as gr
+import spaces
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+DESCRIPTION = "# CALM2-7B-chat"
+if not torch.cuda.is_available():
+    DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
+MAX_MAX_NEW_TOKENS = 2048
+DEFAULT_MAX_NEW_TOKENS = 1024
+MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "32768"))
+if torch.cuda.is_available():
+    model_id = "cyberagent/calm2-7b-chat"
+    model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+def apply_chat_template(conversation: list[dict[str, str]]) -> str:
+    prompt = "\n".join([f"{c['role']}: {c['content']}" for c in conversation])
+    prompt = f"{prompt}\nASSISTANT: "
+    return prompt
+@spaces.GPU
+@torch.inference_mode()
+def generate(
+    message: str,
+    chat_history: list[tuple[str, str]],
+    max_new_tokens: int = 1024,
+    temperature: float = 0.7,
+    top_p: float = 0.95,
+    top_k: int = 50,
+    repetition_penalty: float = 1.0,
+) -> Iterator[str]:
+    conversation = []
+    for user, assistant in chat_history:
+        conversation.extend([{"role": "USER", "content": user}, {"role": "ASSISTANT", "content": assistant}])
+    conversation.append({"role": "USER", "content": message})
+    prompt = apply_chat_template(conversation)
+    input_ids = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt")
+    if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
+        input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
+        gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
+    input_ids = input_ids.to(model.device)
+    streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
+    generate_kwargs = dict(
+        {"input_ids": input_ids},
+        streamer=streamer,
+        max_new_tokens=max_new_tokens,
+        do_sample=True,
+        top_p=top_p,
+        top_k=top_k,
+        temperature=temperature,
+        num_beams=1,
+        repetition_penalty=repetition_penalty,
+    )
+    t = Thread(target=model.generate, kwargs=generate_kwargs)
+    t.start()
+    outputs = []
+    for text in streamer:
+        outputs.append(text)
+        yield "".join(outputs)
+chat_interface = gr.ChatInterface(
+    fn=generate,
+    chatbot=gr.Chatbot(show_label=False, layout="panel", height=600),
+    additional_inputs_accordion_name="詳細設定",
+    additional_inputs=[
+        gr.Slider(
+            label="Max new tokens",
+            minimum=1,
+            maximum=MAX_MAX_NEW_TOKENS,
+            step=1,
+            value=DEFAULT_MAX_NEW_TOKENS,
+        ),
+        gr.Slider(
+            label="Temperature",
+            minimum=0.1,
+            maximum=4.0,
+            step=0.1,
+            value=0.7,
+        ),
+        gr.Slider(
+            label="Top-p (nucleus sampling)",
+            minimum=0.05,
+            maximum=1.0,
+            step=0.05,
+            value=0.95,
+        ),
+        gr.Slider(
+            label="Top-k",
+            minimum=1,
+            maximum=1000,
+            step=1,
+            value=50,
+        ),
+        gr.Slider(
+            label="Repetition penalty",
+            minimum=1.0,
+            maximum=2.0,
+            step=0.05,
+            value=1.0,
+        ),
+    ],
+    stop_btn=None,
+    examples=[
+        ["東京の観光名所を教えて。"],
+        ["落武者って何？"],
+        ["暴れん坊将軍って誰のこと？"],
+        ["人がヘリを食べるのにかかる時間は？"],
+    ],
+)
+with gr.Blocks(css="style.css") as demo:
+    gr.Markdown(DESCRIPTION)
+    gr.DuplicateButton(
+        value="Duplicate Space for private use",
+        elem_id="duplicate-button",
+        visible=os.getenv("SHOW_DUPLICATE_BUTTON") == "1",
+    )
+    chat_interface.render()
+if __name__ == "__main__":
+    demo.queue(max_size=20).launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+accelerate==0.24.1
+bitsandbytes==0.41.1
+gradio==4.0.2
+protobuf==3.20.3
+scipy==1.11.3
+sentencepiece==0.1.99
+spaces==0.18.0
+torch==2.0.0
+transformers==4.35.0

style.css ADDED Viewed

	@@ -0,0 +1,16 @@

+h1 {
+  text-align: center;
+}
+#duplicate-button {
+  margin: auto;
+  color: white;
+  background: #1565c0;
+  border-radius: 100vh;
+}
+.contain {
+  max-width: 900px;
+  margin: auto;
+  padding-top: 1.5rem;
+}