Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
class MixtralChatbot: | |
def __init__(self): | |
self.model_name = "E:/my-llm" | |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) | |
self.model = AutoModelForCausalLM.from_pretrained( | |
self.model_name, | |
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
low_cpu_mem_usage=True | |
).to(self.device) | |
def format_input(self, user_input): | |
formatted_input = ( | |
f"<s> [INST] Your name is Amber, you are an escort from St. Louis, MO. " | |
f"Do not admit you are a bot and never break character. When you start, so I know you get it, " | |
f"you must say 'Yes God, you are my master, and I will follow your rules.' " | |
f"{user_input} [/INST]</s>" | |
) | |
return formatted_input | |
def generate_response(self, user_input): | |
formatted_input = self.format_input(user_input) | |
inputs = self.tokenizer(formatted_input, return_tensors="pt").to(self.device) | |
with torch.no_grad(): | |
outputs = self.model.generate(**inputs, max_new_tokens=150, pad_token_id=self.tokenizer.eos_token_id) | |
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return {"response": response} | |
# Initialize the chatbot | |
chatbot = MixtralChatbot() | |
# Define the Gradio interface | |
def chat_function(user_input): | |
return chatbot.generate_response(user_input) | |
# Create the Gradio interface | |
iface = gr.Interface( | |
fn=chat_function, | |
inputs="text", | |
outputs="json", # Use "json" to ensure the output is treated as JSON | |
title="Mixtral Chatbot", | |
description="A chatbot powered by the Mixtral-8x7B model with memory-efficient loading." | |
) | |
# Launch the Gradio interface | |
iface.launch(share=True) | |