File size: 2,299 Bytes
d208796
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ed0dc9
d208796
8ed0dc9
 
 
 
d208796
 
1a7d665
d208796
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import torch
import tiktoken
from model import *
import gradio as gr

enc = tiktoken.get_encoding('gpt2') 
model = torch.load('model.pt',map_location='cpu') 


def response(message = "Hello, I'm a language model", num_return_sequences = 5,max_length = 30,top_k = 50):
  tokens = enc.encode(message)
  tokens = torch.tensor(tokens, dtype= torch.long) # (8,) #check tiktoken app
  tokens = tokens.unsqueeze(0).repeat(num_return_sequences, 1) # (5, 8)
  x = tokens.to('cpu')
  torch.manual_seed(42)
  torch.cuda.manual_seed(42)
  while x.size(1) < max_length:
      # forward the model to get the logits
      with torch.no_grad():
          logits = model(x)[0] # (B, T, vocab_size)
          # take the logits at the last position
          logits = logits[:, -1, :] # (B, vocab_size)
          # get the probabilities
          probs = F.softmax(logits, dim=-1)
          # do top-k sampling of 50 (huggingface pipeline default)
          # topk_probs here becomes (5, 50), topk_indices is (5, 50)
          topk_probs, topk_indices = torch.topk(probs, top_k, dim=-1)
          # select a token from the top-k probabilities
          # note: multinomial does not demand the input to sum to 1
          ix = torch.multinomial(topk_probs, 1) # (B, 1)
          # gather the corresponding indices
          xcol = torch.gather(topk_indices, -1, ix) # (B, 1)
          # append to the sequence
          x = torch.cat((x, xcol), dim=1)

  # print the generated text
  return_text = ""
  for i in range(num_return_sequences):
      tokens = x[i, :max_length].tolist()
      decoded = enc.decode(tokens)
      return_text = return_text + ">"+ decoded +"\n"
  return return_text


# Create Gradio interface
iface = gr.Interface(
    fn=response,
    inputs=[
        gr.Textbox(lines=5, label="message"),
        gr.Slider(minimum=1, maximum=5, value=1, step=1, label="num_return_sequences"),
        gr.Slider(minimum=10, maximum=150, value=20, step=5, label="max_length"),
        gr.Slider(minimum=1, maximum=100, value=50, step=1, label="top_k"),
    ],
    outputs=gr.Textbox(label="Generated Text"),
    title="GPT Text Generator",
    description="Generate text using GPT-2 model with adjustable parameters.",
    examples=[["Hello, I'm a language model"]],
)

# Launch the interface
iface.launch()