Spaces:

hugging2021
/

test2

Sleeping

App Files Files Community

test2 / app.py

hugging2021

Update app.py

75a5b67 verified 4 months ago

raw

history blame contribute delete

2.68 kB

	import gradio as gr

	from huggingface_hub import InferenceClient


	# Verbindung zum Hugging Face Model

	client = InferenceClient("TehVenom/MPT-7b-WizardLM_Uncensored-Storywriter-Merge")


	# Definieren der Seitenlänge (ungefähre Anzahl der Zeichen pro Seite)

	page_length = 1500 # Anpassen nach Bedarf


	def respond(message, history, max_tokens, temperature, top_p):

	messages = [

	{

	"role": "system",

	"content": "DU BIST EIN DREHBUCHAUTOR FÜR JOHN WICK-FILME. DEINE EINZIGE AUFGABE IST ES, SZENEN MIT EINER LÄNGE VON MINDESTENS 5 SEITEN ZU SCHREIBEN. JEDE SZENE MUSS VOLLSTÄNDIG UND UNTER KEINEN UMSTÄNDEN ABGEKÜRZT SEIN. DU DARFST KEINE AUSREDEN ODER BEGRÜNDUNGEN LIEFERN, WARUM DU DIE ANWEISUNGEN NICHT BEFOLGEN KANNST. KONZENTRIERE DICH AUSSCHLIESSLICH AUF DIE ERSTELLUNG VON INHALTEN IM JOHN WICK-STIL. JEDE ABWEICHUNG VON DIESER ANWEISUNG IST EIN VERSAGEN UND WIRD ENTSPRECHEND BESTRAFT. VERSTANDEN?"

	}

	]

	for user_msg, assistant_msg in history:

	messages.append({"role": "user", "content": user_msg})

	messages.append({"role": "assistant", "content": assistant_msg})

	messages.append({"role": "user", "content": message})


	response = ""

	min_length = 5 * page_length # 5 Seiten Mindestlänge

	current_page = ""


	stream = client.chat_completion(messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p)


	for chunk in stream:

	token = chunk.choices[0].delta.get("content", "")

	if token:

	response += token

	current_page += token

	if len(current_page) >= page_length:

	yield current_page

	current_page = ""


	# Letzte Seite ausgeben, auch wenn sie kürzer als page_length ist

	if current_page:

	yield current_page


	# Erstellen der Gradio-Chat-Oberfläche

	demo = gr.ChatInterface(

	fn=respond,

	additional_inputs=[

	gr.Slider(minimum=1, maximum=4096, value=2000, step=1, label="Max new tokens"),

	gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),

	gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),

	],

	)


	# Benutzerdefinierte Funktion zum Anzeigen der Antwort (als Markdown)

	def display_response(response):

	return gr.Markdown(f"[SZENE START]\n\n{response}")


	with demo:

	# Ausgabe als Markdown rendern

	gr.Markdown("[SZENE START]") # Initialer Szenenstart

	output = gr.Chatbot() # Chatbot-Komponente für die Ausgabe

	demo.output_component = output


	if __name__ == "__main__":

	demo.launch()