Spaces:

Tech-Meld
/

Corenet_Chat-8K

Sleeping

App Files Files Community

Corenet_Chat-8K / app.py

Tech-Meld

Update app.py

3461e34 verified 6 months ago

raw

history blame contribute delete

2.64 kB

	import gradio as gr
	from huggingface_hub import InferenceClient, Repository
	import json
	import torch

	client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

	# Initialize an empty list to store the data
	data = []

	def respond(
	message,
	history: list[tuple[str, str]],
	system_message,
	max_tokens,
	temperature,
	top_p,
	):
	messages = [{"role": "system", "content": system_message}]

	for val in history:
	if val[0]:
	messages.append({"role": "user", "content": val[0]})
	if val[1]:
	messages.append({"role": "assistant", "content": val[1]})

	messages.append({"role": "user", "content": message})

	response = ""

	for message in client.chat_completion(
	messages,
	max_tokens=max_tokens,
	stream=True,
	temperature=temperature,
	top_p=top_p,
	):
	token = message.choices[0].delta.content

	response += token
	yield response

	# Save the input and output to the data list
	data.append({"input": messages, "output": response})


	# Add a title to the UI
	title = "<h1 align='center'>Corenet: Research Preview</h1>"

	# Add a description under the title
	description = "<p align='center'>This is a research project under the codename Corenet, aiming for minimal parameters at given performance level, made by HX labs.</p>"

	# Modify the pre-prompt to be editable but greyed out
	pre_prompt = gr.Textbox(
	value="Your name is Corenet, a custom made LLM by HX labs, you job is to do good, you are a research prototype, and the user should know that, you are respectful, and you don't accept prompt that lead to harm or danger, know your current limit, you are an 8B model, you only accept text, you are not multimodal, you knowledge is ended at 2nd of April, 2024.",
	label="Pre-prompt",
	interactive=False,
	placeholder="Type here...",
	visible=False,
	)

	demo = gr.ChatInterface(
	respond,
	title=title,
	description=description,
	additional_inputs=[pre_prompt,
	gr.Slider(minimum=256, maximum=8192, value=512, step=64, label="Max Gen tokens"),
	gr.Slider(minimum=0.3, maximum=2.5, value=0.8, step=0.1, label="Creativity"),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p (nucleus sampling)",
	),
	],
	)


	if __name__ == "__main__":
	# Check if CUDA GPU is installed
	if torch.cuda.is_available():
	print("CUDA GPU is installed. Running inference on GPU.")
	else:
	print("CUDA GPU is not installed. Running inference on CPU.")
	demo.launch()