sl0th / app.py
tevykuch's picture
Update app.py
bf2fe7e verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
sys_message = """
This model can generate untruths, lies or inappropriate things. Only for testing and validation.
"""
pipe = pipeline("text-generation", model="tevykuch/sftsl0th", device=0, framework="pt")
# Configuration settings for model generation (example)
generation_config = {
"max_new_tokens": 2048,
"temperature": 0.50,
"top_p": 0.95,
"top_k": 30,
"repetition_penalty": 1.1,
"eos_token_id": pipe.tokenizer.eos_token_id
}
# tokenizer = AutoTokenizer.from_pretrained("tevykuch/sftsl0th")
# llm = AutoModelForCausalLM.from_pretrained("tevykuch/sftsl0th")
# def stream(prompt):
# # Tokenize the prompt
# inputs = tokenizer.encode(prompt, return_tensors="pt")
# # Generate a response
# output_ids = llm.generate(inputs, **generation_config)
# # Decode the generated ids to a string
# response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
# return response
def stream(prompt):
outputs = pipe(prompt, **generation_config)
response = outputs[0]["generated_text"]
return response
chat_interface = gr.ChatInterface(
fn=stream,
stop_btn=None,
examples=[
["αžαžΎαž–αžŽαŸŒαž…αž˜αŸ’αž”αž„αž‘αžΆαŸ†αž„αž”αžΈαž˜αžΆαž“αž’αŸ’αžœαžΈαžαŸ’αž›αŸ‡?"],
["αžαžΎαž™αžΎαž„αž’αžΆαž…αž€αžΆαžαŸ‹αž”αž“αŸ’αžαž™αž€αžΆαžšαž”αŸ†αž–αž»αž›αž”αžšαž·αž™αžΆαž€αžΆαžŸαž™αŸ‰αžΆαž„αžŠαžΌαž…αž˜αŸ’αžαŸαž…?"],
["αžšαŸ€αž”αžšαžΆαž”αŸ‹αž–αžΈαž–αŸαž›αžœαŸαž›αžΆαžŠαŸ‚αž›αž’αŸ’αž“αž€αžαŸ’αžšαžΌαžœαž’αŸ’αžœαžΎαž€αžΆαžšαžŸαž˜αŸ’αžšαŸαž…αž…αž·αžαŸ’αžαž›αŸ†αž”αžΆαž€αŸ”"],
["αž€αŸ†αžŽαžαŸ‹αž’αžαŸ’αžαžŸαž‰αŸ’αž‰αžΆαžŽαž˜αž½αž™αžŸαŸαžŸαž…αŸαž‰αŸ”"],
["αžŸαžšαžŸαŸαžšαžšαžΏαž„αžαŸ’αž›αžΈαž˜αž½αž™αž€αŸ’αž“αž»αž„αž€αžΆαžšαž’αŸ’αžœαžΎαžœαž·αžŸαŸ„αž’αž“αž€αž˜αŸ’αž˜αžšαž”αžŸαŸ‹αž”αž»αž‚αŸ’αž‚αž›αž‘αžΈαž”αžΈαž’αŸ†αž–αžΈαžαž½αž―αž€αžŠαŸ‚αž›αžαŸ’αžšαžΌαžœαž’αŸ’αžœαžΎαž€αžΆαžšαžŸαž˜αŸ’αžšαŸαž…αž…αž·αžαŸ’αžαž’αžΆαž‡αžΈαž–αžŠαŸαžŸαŸ†αžαžΆαž“αŸ‹αž˜αž½αž™αŸ”"],
["αžœαžΆαž™αžαž˜αŸ’αž›αŸƒαž”αŸ’αžšαž™αŸ„αž‚αž“αŸαŸ‡αžŸαž˜αŸ’αžšαžΆαž”αŸ‹αž€αŸ†αž αž»αžŸαž’αž€αŸ’αžαžšαžΆαžœαž·αžšαž»αž‘αŸ’αž’αž“αž·αž„αžœαŸαž™αŸ’αž™αžΆαž€αžšαžŽαŸ"]
],
)
with gr.Blocks() as demo:
gr.HTML("<h1><center> sl0th inference tester only (not final) <h1><center>")
gr.HTML(
"<h4 style='text-align: center'>"
"<a href='https://huggingface.co/tevykuch/sl0th' target='_blank'>Model: Sl0th Mistral 7b 0.2</a> | "
"</h4>"
)
gr.HTML("<p><center>Finetune here <a href='https://huggingface.co/unsloth/mistral-7b-bnb-4bit' target='_blank'>Mistral 7b</a> thanks dataset maker (my coworker) <a href='https://huggingface.co/datasets/metythorn/khmerllm-dataset-alpaca-52k-v1'>Alpaca-data-pt-br</a>.<p><center>")
chat_interface.render()
gr.Markdown(sys_message)
gr.DuplicateButton(value="Duplicate the Magic", elem_id="duplicate-button")
if __name__ == "__main__":
demo.queue(max_size=10).launch()