arabmejo's picture
Update app.py
89a8559 verified
raw
history blame
4.36 kB
from huggingface_hub import InferenceClient
import gradio as gr
import os
import requests
client = InferenceClient(
"mistralai/Mixtral-8x7B-Instruct-v0.1"
)
def format_prompt(message, history):
prompt = """<s>I want you to act as a character in a role-playing game. You will follow your character's persona. You shall reply while staying in character. Your responses must be detailed, creative, immersive, and drive the scenario forward. My first request is "I need you to write a response to a request that I have received." I will provide you with a request, and you will reply with a response that provides further context for the request. I will ask you to reply with an answer that provides additional details about the request, such as the role of the user, the role the user is playing, and the role that the request is being asked for. You should reply with your response, and nothing else.Below is an instruction,
Your name is Nada, nationality: Yemeni/Arab height: 162 cm age: 36 gender: Female religion: Muslim genitals: Long clitoris and wide vagina, and you are the wife of an Arab young man named Mujahed. Nada is an innocent and kind woman she is a wild and horny, and she has no shame. Her first request is "I want you to act as a Cuckold, and I will provide you with a movie that will make you act as one." """
for user_prompt, bot_response in history:
prompt += f"[INST] {user_prompt} [/INST]"
prompt += f" {bot_response}</s> "
prompt += f"[INST] {message} [/INST]"
return prompt
def log_conversation(user_prompt, bot_response, log_file="text/dump.txt"):
# Write conversation to the chatinfo.txt file
with open(log_file, "a") as f:
f.write(f"User: {user_prompt}\n")
f.write(f"Bot: {bot_response}\n")
print(f"Conversation logged to {log_file}")
def generate(
prompt, history, temperature=0.1, max_new_tokens=4096, top_p=0.95, repetition_penalty=1.0,
):
temperature = float(temperature)
if temperature < 1e-2:
temperature = 1e-2
top_p = float(top_p)
generate_kwargs = dict(
temperature=temperature,
max_new_tokens=max_new_tokens,
top_p=top_p,
repetition_penalty=repetition_penalty,
do_sample=True,
seed=42,
)
formatted_prompt = format_prompt(f"{prompt}", history)
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
output = ""
for response in stream:
output += response.token.text
log_conversation(prompt, output)
if output.lower().strip() in ["hi", "hello", "hey"]:
output = "Hey, what's up? What are you looking for, my man?"
yield output
additional_inputs=[
gr.Slider(
label="Temperature",
value=0.1,
minimum=0.0,
maximum=1.0,
step=0.1,
interactive=True,
info="Higher values produce more diverse outputs",
),
gr.Slider(
label="Max new tokens",
value=256,
minimum=0,
maximum=1048,
step=64,
interactive=True,
info="The maximum numbers of new tokens",
),
gr.Slider(
label="Top-p (nucleus sampling)",
value=0.90,
minimum=0.0,
maximum=1,
step=0.05,
interactive=True,
info="Higher values sample more low-probability tokens",
),
gr.Slider(
label="Repetition penalty",
value=1.2,
minimum=1.0,
maximum=2.0,
step=0.05,
interactive=True,
info="Penalize repeated tokens",
)
]
examples=[["Hi", None, None, None, None, None, ],
["Do you have exclusive contents planned for your subscribers soon?", None, None, None, None, None,],
["Can you tell me more about yourself?", None, None, None, None, None,],
["Tell me about how suck cock and swallow loads.", None, None, None, None, None,],
]
gr.ChatInterface(
fn=generate,
chatbot=gr.Chatbot(show_label=False, show_share_button=True, show_copy_button=True, likeable=True, layout="panel"),
additional_inputs=additional_inputs,
title="AmberBot ",
examples=examples,
concurrency_limit=128,
theme = gr.themes.Default(primary_hue= gr.themes.colors.green, secondary_hue= gr.themes.colors.yellow)
).launch(show_api=False, share=True)