from ctransformers import AutoModelForCausalLM, AutoTokenizer import gradio as gr sys_message = """ This model can generate untruths, lies or inappropriate things. Only for testing and validation. """ # Configuration settings for model generation (example) generation_config = { "max_new_tokens": 2048, "temperature": 0.50, "top_p": 0.95, "top_k": 30, "repetition_penalty": 1.1, "stop_token": '### Instruction:' } llm = AutoModelForCausalLM.from_pretrained("tevykuch/sl0th", hf=True) tokenizer = AutoTokenizer.from_pretrained(llm) def stream(prompt): # Tokenize the prompt inputs = tokenizer.encode(prompt, return_tensors="pt") # Generate a response output_ids = llm.generate(inputs, **generation_config) # Decode the generated ids to a string response = tokenizer.decode(output_ids[0], skip_special_tokens=True) return response chat_interface = gr.ChatInterface( fn=stream, stop_btn=None, examples=[ ["តើពណ៌ចម្បងទាំងបីមានអ្វីខ្លះ?"], ["តើយើងអាចកាត់បន្ថយការបំពុលបរិយាកាសយ៉ាងដូចម្តេច?"], ["រៀបរាប់ពីពេលវេលាដែលអ្នកត្រូវធ្វើការសម្រេចចិត្តលំបាក។"], ["កំណត់អត្តសញ្ញាណមួយសេសចេញ។"], ["សរសេររឿងខ្លីមួយក្នុងការធ្វើវិសោធនកម្មរបស់បុគ្គលទីបីអំពីតួឯកដែលត្រូវធ្វើការសម្រេចចិត្តអាជីពដ៏សំខាន់មួយ។"], ["វាយតម្លៃប្រយោគនេះសម្រាប់កំហុសអក្ខរាវិរុទ្ធនិងវេយ្យាករណ៍"] ], ) with gr.Blocks() as demo: gr.HTML("

sl0th inference tester only (not final)
") gr.HTML( "
" "Model: Sl0th Mistral 7b 0.2 | " "
" ) gr.HTML("
Finetune here Mistral 7b thanks dataset maker (my coworker) Alpaca-data-pt-br.
") chat_interface.render() gr.Markdown(sys_message) gr.DuplicateButton(value="Duplicate the Magic", elem_id="duplicate-button") if name == "main": demo.queue(max_size=10).launch()