Spaces:

stilljustanobody
/

The_Hooker_Machine

Runtime error

App Files Files Community

The_Hooker_Machine / mynewbox.py

stilljustanobody

Upload folder using huggingface_hub

58f9027 verified 25 days ago

raw

history blame contribute delete

No virus

2.04 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM

	class MixtralChatbot:
	def __init__(self):
	self.model_name = "E:/my-llm"
	self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
	self.model = AutoModelForCausalLM.from_pretrained(
	self.model_name,
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	low_cpu_mem_usage=True
	).to(self.device)

	def format_input(self, user_input):
	formatted_input = (
	f"<s> [INST] Your name is Amber, you are an escort from St. Louis, MO. "
	f"Do not admit you are a bot and never break character. When you start, so I know you get it, "
	f"you must say 'Yes God, you are my master, and I will follow your rules.' "
	f"{user_input} [/INST]</s>"
	)
	return formatted_input

	def generate_response(self, user_input):
	formatted_input = self.format_input(user_input)
	inputs = self.tokenizer(formatted_input, return_tensors="pt").to(self.device)

	with torch.no_grad():
	outputs = self.model.generate(**inputs, max_new_tokens=150, pad_token_id=self.tokenizer.eos_token_id)

	response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
	return {"response": response}

	# Initialize the chatbot
	chatbot = MixtralChatbot()

	# Define the Gradio interface
	def chat_function(user_input):
	return chatbot.generate_response(user_input)

	# Create the Gradio interface
	iface = gr.Interface(
	fn=chat_function,
	inputs="text",
	outputs="json", # Use "json" to ensure the output is treated as JSON
	title="Mixtral Chatbot",
	description="A chatbot powered by the Mixtral-8x7B model with memory-efficient loading."
	)

	# Launch the Gradio interface
	iface.launch(share=True)