import gradio as gr import torch import spaces from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig tokenizer = AutoTokenizer.from_pretrained("tanamettpk/TC-instruct-DPO") model = AutoModelForCausalLM.from_pretrained("tanamettpk/TC-instruct-DPO", low_cpu_mem_usage=True, return_dict=True, device_map={"": 0}) generation_config = GenerationConfig( do_sample=True, top_k=1, temperature=0.5, max_new_tokens=300, repetition_penalty=1.1, pad_token_id=tokenizer.eos_token_id) @spaces.GPU(duration=120) def generate_text(inp, history): inp = """ ### Instruction: """ + inp + """ ### Response: """ input_ids = tokenizer(inp, return_tensors='pt').to("cuda") beam_output = model.generate(**input_ids, generation_config=generation_config) output = tokenizer.decode(beam_output[0], skip_special_token=True) output = output.replace(inp,"").replace("","").replace("","") return output gr.ChatInterface(generate_text).launch()