Spaces:

georgesung
/

llama2_7b_uncensored_chat

Paused

App Files Files Community

llama2_7b_uncensored_chat / app.py

georgesung

Update app.py

cb96876 over 1 year ago

raw

history blame contribute delete

2.54 kB

	from transformers import LlamaForCausalLM, LlamaTokenizer, pipeline
	import torch

	import gradio as gr

	# LLM helper functions
	def get_response_text(data):
	text = data[0]["generated_text"]

	assistant_text_index = text.rfind('### RESPONSE:')
	if assistant_text_index != -1:
	text = text[assistant_text_index+len('### RESPONSE:'):].strip()

	return text

	def get_llm_response(prompt, pipe):
	raw_output = pipe(prompt)
	text = get_response_text(raw_output)
	return text

	# Load LLM
	model_id = "georgesung/llama2_7b_chat_uncensored"
	tokenizer = LlamaTokenizer.from_pretrained(model_id)
	model = LlamaForCausalLM.from_pretrained(model_id, device_map="auto", load_in_8bit=True)

	# Llama tokenizer missing pad token
	tokenizer.add_special_tokens({'pad_token': '[PAD]'})

	pipe = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	max_length=4096, # Llama-2 default context window
	temperature=0.7,
	top_p=0.95,
	repetition_penalty=1.15
	)

	with gr.Blocks() as demo:
	gr.Markdown("""
	# Chat with llama2_7b_chat_uncensored
	NOTICE: I will pause this space on Monday, July 24, around noon UTC. Since it costs $$ to run :)

	If you wish to run this space yourself, you can duplicate this space and run it on a T4 small instance.
	""")
	chatbot = gr.Chatbot()
	msg = gr.Textbox()
	clear = gr.Button("Clear")

	def hist_to_prompt(history):
	prompt = ""
	for human_text, bot_text in history:
	prompt += f"### HUMAN:\n{human_text}\n\n### RESPONSE:\n"
	if bot_text:
	prompt += f"{bot_text}\n\n"
	return prompt

	def get_bot_response(text):
	bot_text_index = text.rfind('### RESPONSE:')
	if bot_text_index != -1:
	text = text[bot_text_index + len('### RESPONSE:'):].strip()
	return text

	def user(user_message, history):
	return "", history + [[user_message, None]]

	def bot(history):
	#bot_message = random.choice(["How are you?", "I love you", "I'm very hungry"])
	#history[-1][1] = bot_message + '</s>'

	hist_text = hist_to_prompt(history)
	print(hist_text)
	bot_message = get_llm_response(hist_text, pipe) + tokenizer.eos_token
	history[-1][1] = bot_message # add bot message to overall history

	return history

	msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
	bot, chatbot, chatbot
	)
	clear.click(lambda: None, None, chatbot, queue=False)

	demo.queue()
	demo.launch()