Spaces:

mudogruer
/

Mixtral-7x8b-SciQ

Runtime error

App Files Files Community

Mixtral-7x8b-SciQ / app.py

mudogruer

Update app.py

8481ccc verified 4 months ago

raw

history blame contribute delete

No virus

2.44 kB

	# -- coding: utf-8 --
	"""gradio.ipynb

	Automatically generated by Colab.

	Original file is located at
	https://colab.research.google.com/drive/1goHcmXF0Gc4_X9PN-zecV77j9KeI6Dmn
	"""

	# !pip install -q -U gradio
	# !pip install -q -U torch transformers accelerate einops
	# !pip install -q peft





	import gradio as gr

	import torch
	from transformers import (
	AutoTokenizer,
	AutoModelForCausalLM,
	TextIteratorStreamer,
	pipeline,
	)


	# The huggingface model id for Microsoft's phi-2 model
	# Download and load model and tokenizer
	tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1" ,trust_remote_code=True)

	#Download safetensor of adapter of fine-tune Phi-2 model
	from peft import PeftModel, PeftConfig


	config = PeftConfig.from_pretrained("mudogruer/mixtral-7x8b-SciQ")
	base_model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
	model = PeftModel.from_pretrained(base_model, "mudogruer/mixtral-7x8b-SciQ")

	# Text generation pipeline
	phi2 = pipeline(
	"text-generation",
	tokenizer=tokenizer,
	model=model,
	pad_token_id=tokenizer.eos_token_id,
	eos_token_id=tokenizer.eos_token_id,
	device_map="cpu",
	)

	examples = [["Which organelle carries out the synthesis and packaging of digestive enzymes?"],
	["What is the change in speed of a moving object per unit time?"] ,
	["What is the formula of carbon tetrafluoride?"]]

	def generate(message, max_new_tokens):
	instruction = "You are a helpful assistant to 'User'. You do not respond as 'User' or pretend to be 'User'. You only respond once as 'Assistant'."
	final_prompt = f"Instruction: {instruction}\nUser: {message}\nOutput:"

	# Generate text synchronously
	response = phi2(final_prompt, max_new_tokens=max_new_tokens)
	generated_text = response[0]['generated_text']

	# Process to extract the last assistant's response
	# Assuming the last line after 'Output:' is the response
	last_response = generated_text.split('Output:')[-1].strip()
	return last_response

	# Update the Gradio interface setup
	with gr.Blocks() as demo:
	gr.Markdown("""### Mixtral-7x8b Scientific Question Chatbot(Fine-tuned from SciQ dataset)""")
	tokens_slider = gr.Slider(8, 128, value=21, label="Maximum new tokens")
	chatbot = gr.Interface(fn=generate, inputs=["text", tokens_slider], outputs="text", examples = examples)
	demo.launch(share=True)