llama-2-70b-guanaco-qlora-ggml

Runtime error

App Files Files Community

ffreemt commited on Aug 1, 2023

Commit

2fe65b3

•

1 Parent(s): 34fa40f

Update prompt_tempalte branch

Browse files

Files changed (1) hide show

app.py +34 -28

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ import random
 import time
 from dataclasses import asdict, dataclass, field
 from pathlib import Path
 # from types import SimpleNamespace
 import gradio as gr
@@ -20,38 +21,11 @@ from examples_list import examples_list
 url = "https://huggingface.co/TheBloke/llama-2-13B-Guanaco-QLoRA-GGML/blob/main/llama-2-13b-guanaco-qlora.ggmlv3.q4_K_S.bin"  # 8.14G
-# Prompt template: Guanaco
-# {past_history}
-prompt_template = """You are a helpful assistant. Let's think step by step.
-### Human:
-{question}
-### Assistant:"""
-human_prefix = "### Human"
-ai_prefix = "### Assistant"
-stop_list = [f"{human_prefix}:"]
-prompt_template = """### System:
-You are Stable Beluga, an AI that follows instructions extremely well. Help as much as you can.
-### User: {question}
-### Assistant:
-"""
-human_prefix = "### User"
-ai_prefix = "### Assistant"
-stop_list = [f"{human_prefix}:"]
-_ = psutil.cpu_count(logical=False) - 1
-cpu_count: int = int(_) if _ else 1
-logger.debug(f"{cpu_count=}")
 LLM = None
-if "forindo" in platform.node():
     # url = "https://huggingface.co/TheBloke/llama-2-70b-Guanaco-QLoRA-GGML/blob/main/llama-2-70b-guanaco-qlora.ggmlv3.q3_K_S.bin"  # 29.7G
     # model_loc = "/home/mu2018/github/langchain-llama-2-70b-guanaco-qlora-ggml/models/llama-2-70b-guanaco-qlora.ggmlv3.q3_K_S.bin"
-    model_loc = "models/stablebeluga2-70b.ggmlv3.q3_K_S.bin"
-    assert Path(model_loc).exists(), f"Make sure {model_loc=} exists."
     _ = """
     url = "https://huggingface.co/TheBloke/StableBeluga2-70B-GGML/blob/main/stablebeluga2-70b.ggmlv3.q3_K_S.bin"
     try:
@@ -61,8 +35,11 @@ if "forindo" in platform.node():
         logger.error(exc_)
         raise SystemExit(1) from exc_
     # """
 else:
     try:
         model_loc, file_size = dl_hf_model(url)
         logger.info(f"done load llm {model_loc=} {file_size=}G")
     except Exception as exc_:
@@ -71,6 +48,35 @@ else:
 # raise SystemExit(0)
 logger.debug(f"{model_loc=}")
 LLM = AutoModelForCausalLM.from_pretrained(
     model_loc,

 import time
 from dataclasses import asdict, dataclass, field
 from pathlib import Path
+from textwrap import dedent
 # from types import SimpleNamespace
 import gradio as gr
 url = "https://huggingface.co/TheBloke/llama-2-13B-Guanaco-QLoRA-GGML/blob/main/llama-2-13b-guanaco-qlora.ggmlv3.q4_K_S.bin"  # 8.14G
 LLM = None
+if "forindo" in platform.node():  # deploy 70b model locally
     # url = "https://huggingface.co/TheBloke/llama-2-70b-Guanaco-QLoRA-GGML/blob/main/llama-2-70b-guanaco-qlora.ggmlv3.q3_K_S.bin"  # 29.7G
     # model_loc = "/home/mu2018/github/langchain-llama-2-70b-guanaco-qlora-ggml/models/llama-2-70b-guanaco-qlora.ggmlv3.q3_K_S.bin"
     _ = """
     url = "https://huggingface.co/TheBloke/StableBeluga2-70B-GGML/blob/main/stablebeluga2-70b.ggmlv3.q3_K_S.bin"
     try:
         logger.error(exc_)
         raise SystemExit(1) from exc_
     # """
+    model_loc = "models/stablebeluga2-70b.ggmlv3.q3_K_S.bin"
+    assert Path(model_loc).exists(), f"Make sure {model_loc=} exists."
 else:
     try:
+        logger.debug(f" dl {url}")
         model_loc, file_size = dl_hf_model(url)
         logger.info(f"done load llm {model_loc=} {file_size=}G")
     except Exception as exc_:
 # raise SystemExit(0)
+# Prompt template: Guanaco
+# {past_history}
+prompt_template = """You are a helpful assistant. Let's think step by step.
+### Human:
+{question}
+### Assistant:"""
+human_prefix = "### Human"
+ai_prefix = "### Assistant"
+stop_list = [f"{human_prefix}:"]
+if "beluga" in model_loc.lower():
+    prompt_template = dedent("""
+    ### System:
+    You are Stable Beluga, an AI that follows instructions extremely well. Help as much as you can.
+    Let's think step by step.
+    ### User: {question}
+    ### Assistant:
+    """).lstrip()
+    human_prefix = "### User"
+    ai_prefix = "### Assistant"
+    stop_list = [f"{human_prefix}:"]
+_ = psutil.cpu_count(logical=False) - 1
+cpu_count: int = int(_) if _ else 1
+logger.debug(f"{cpu_count=}")
 logger.debug(f"{model_loc=}")
 LLM = AutoModelForCausalLM.from_pretrained(
     model_loc,