Llama-3.1-8B-Instruct

Runtime error

App Files Files Community

vilarin commited on Jul 23

Commit

0486bff

•

1 Parent(s): 7cb9567

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -26

app.py CHANGED Viewed

@@ -6,16 +6,15 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStream
 import gradio as gr
 from threading import Thread
-MODEL_LIST = ["meta-llama/Meta-Llama-3.1-8B-Instruct", "meta-llama/Meta-Llama-3.1-70B-Instruct"]
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 MODEL = os.environ.get("MODEL_ID")
-TITLE = "<h1><center>Meta-Llama3.1-Chat</center></h1>"
 PLACEHOLDER = """
 <center>
-<p>😊Hi! How can I help you today?</p><br>
-<p>✨Select Meta-Llama3.1-8B/70B in Advanced Options</p>
 </center>
 """
@@ -40,20 +39,14 @@ quantization_config = BitsAndBytesConfig(
     bnb_4bit_use_double_quant=True,
     bnb_4bit_quant_type= "nf4")
 tokenizer = AutoTokenizer.from_pretrained(MODEL)
-model_8b = AutoModelForCausalLM.from_pretrained(
-    MODEL_LIST[0],
-    torch_dtype=torch.bfloat16,
-    device_map="auto",
-    quantization_config=quantization_config)
-model_70b = AutoModelForCausalLM.from_pretrained(
-    MODEL_LIST[1],
     torch_dtype=torch.bfloat16,
     device_map="auto",
     quantization_config=quantization_config)
-@spaces.GPU(duration=120)
 def stream_chat(
     message: str,
     history: list,
@@ -63,7 +56,6 @@ def stream_chat(
     top_p: float = 1.0,
     top_k: int = 20,
     penalty: float = 1.2,
-    choice: str = "Meta-Llama-3.1-8B"
 ):
     print(f'message: {message}')
     print(f'history: {history}')
@@ -79,11 +71,6 @@ def stream_chat(
     conversation.append({"role": "user", "content": message})
-    if choice == "Meta-Llama-3.1-8B":
-        model = model_8b
-    else:
-        model = model_70b
     input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt").to(model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
@@ -118,7 +105,7 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
         fn=stream_chat,
         chatbot=chatbot,
         fill_height=True,
-        additional_inputs_accordion=gr.Accordion(label="⚙️ Advanced Options", open=False, render=False),
         additional_inputs=[
             gr.Textbox(
                 value="You are a helpful assistant",
@@ -165,12 +152,6 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
                 label="Repetition penalty",
                 render=False,
             ),
-            gr.Radio(
-                ["Meta-Llama-3.1-8B", "Meta-Llama-3.1-70B"],
-                value="Meta-Llama-3.1-8B",
-                label="Load Model",
-                render=False,
-            ),
         ],
         examples=[
             ["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."],

 import gradio as gr
 from threading import Thread
+MODEL_LIST = ["meta-llama/Meta-Llama-3.1-8B-Instruct"]
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 MODEL = os.environ.get("MODEL_ID")
+TITLE = "<h1><center>Meta-Llama3.1-8B</center></h1>"
 PLACEHOLDER = """
 <center>
+<p>Hi! How can I help you today?</p>
 </center>
 """
     bnb_4bit_use_double_quant=True,
     bnb_4bit_quant_type= "nf4")
 tokenizer = AutoTokenizer.from_pretrained(MODEL)
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL,
     torch_dtype=torch.bfloat16,
     device_map="auto",
     quantization_config=quantization_config)
+@spaces.GPU()
 def stream_chat(
     message: str,
     history: list,
     top_p: float = 1.0,
     top_k: int = 20,
     penalty: float = 1.2,
 ):
     print(f'message: {message}')
     print(f'history: {history}')
     conversation.append({"role": "user", "content": message})
     input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt").to(model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
         fn=stream_chat,
         chatbot=chatbot,
         fill_height=True,
+        additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
         additional_inputs=[
             gr.Textbox(
                 value="You are a helpful assistant",
                 label="Repetition penalty",
                 render=False,
             ),
         ],
         examples=[
             ["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."],