import torch import tiktoken from model import * import gradio as gr enc = tiktoken.get_encoding('gpt2') model = torch.load('model.pt',map_location='cpu') def response(message = "Hello, I'm a language model", num_return_sequences = 5,max_length = 30,top_k = 50): tokens = enc.encode(message) tokens = torch.tensor(tokens, dtype= torch.long) # (8,) #check tiktoken app tokens = tokens.unsqueeze(0).repeat(num_return_sequences, 1) # (5, 8) x = tokens.to('cpu') torch.manual_seed(42) torch.cuda.manual_seed(42) while x.size(1) < max_length: # forward the model to get the logits with torch.no_grad(): logits = model(x)[0] # (B, T, vocab_size) # take the logits at the last position logits = logits[:, -1, :] # (B, vocab_size) # get the probabilities probs = F.softmax(logits, dim=-1) # do top-k sampling of 50 (huggingface pipeline default) # topk_probs here becomes (5, 50), topk_indices is (5, 50) topk_probs, topk_indices = torch.topk(probs, top_k, dim=-1) # select a token from the top-k probabilities # note: multinomial does not demand the input to sum to 1 ix = torch.multinomial(topk_probs, 1) # (B, 1) # gather the corresponding indices xcol = torch.gather(topk_indices, -1, ix) # (B, 1) # append to the sequence x = torch.cat((x, xcol), dim=1) # print the generated text return_text = "" for i in range(num_return_sequences): tokens = x[i, :max_length].tolist() decoded = enc.decode(tokens) return_text = return_text + ">"+ decoded +"\n" return return_text # Create Gradio interface iface = gr.Interface( fn=response, inputs=[ gr.Textbox(lines=5, label="message"), gr.Slider(minimum=1, maximum=5, value=1, step=1, label="num_return_sequences"), gr.Slider(minimum=10, maximum=150, value=20, step=5, label="max_length"), gr.Slider(minimum=1, maximum=100, value=50, step=1, label="top_k"), ], outputs=gr.Textbox(label="Generated Text"), title="GPT Text Generator", description="Generate text using GPT-2 model with adjustable parameters.", examples=[["Hello, I'm a language model"]], ) # Launch the interface iface.launch()