ABDULHODIY commited on
Commit
9e151d0
1 Parent(s): 69cf26c

Upload 4 files

Browse files
Files changed (4) hide show
  1. chat_nitro.py +80 -0
  2. summator_model.nit +0 -0
  3. vocab.json +0 -0
  4. vocab2.json +0 -0
chat_nitro.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import torch
3
+ from torch.utils.data import DataLoader, Dataset
4
+ import torch.nn as nn
5
+
6
+ # Initialize tokenizer
7
+ class CustomTokenizer:
8
+ def __init__(self, vocab):
9
+ self.vocab = vocab
10
+
11
+ def encode(self, text):
12
+ tokens = text.split()
13
+ ids = [self.vocab.get(token, self.vocab['[UNK]']) for token in tokens]
14
+ return ids
15
+
16
+ def decode(self, ids):
17
+ tokens = [list(self.vocab.keys())[id] for id in ids if id != self.vocab['[PAD]'] and id < len(self.vocab)]
18
+ return ' '.join(tokens)
19
+
20
+ def pad_sequence(self, sequence, max_length):
21
+ if len(sequence) < max_length:
22
+ sequence = sequence + [self.vocab['[PAD]']] * (max_length - len(sequence))
23
+ else:
24
+ sequence = sequence[:max_length]
25
+ return sequence
26
+
27
+ # Sample language model class
28
+ class LanguageModel(nn.Module):
29
+ def __init__(self, vocab_size, embed_size, hidden_size):
30
+ super(LanguageModel, self).__init__()
31
+ self.embedding = nn.Embedding(vocab_size, embed_size)
32
+ self.rnn = nn.GRU(embed_size, hidden_size, batch_first=True)
33
+ self.fc = nn.Linear(hidden_size, vocab_size)
34
+
35
+ def forward(self, x, hidden=None):
36
+ embedded = self.embedding(x)
37
+ output, hidden = self.rnn(embedded, hidden)
38
+ output = self.fc(output)
39
+ return output, hidden
40
+
41
+ # Load the vocab from the JSON file
42
+ with open('vocab2.json', 'r') as f:
43
+ vocab = json.load(f)
44
+
45
+ special_tokens = ['[PAD]', '[UNK]']
46
+ for token in special_tokens:
47
+ if token not in vocab:
48
+ vocab[token] = len(vocab)
49
+
50
+ tokenizer = CustomTokenizer(vocab)
51
+
52
+ # Model parameters
53
+ embed_size = 900
54
+ hidden_size = 900
55
+ vocab_size = max(vocab.values()) + 1
56
+
57
+ # Load the model
58
+ model = LanguageModel(vocab_size, embed_size, hidden_size)
59
+ model.load_state_dict(torch.load('language_model.nit'))
60
+ model.eval()
61
+
62
+ def generate_response(input_text, model, tokenizer, max_length=1000):
63
+ encoded_input = tokenizer.encode(input_text)
64
+ padded_input = tokenizer.pad_sequence(encoded_input, max_length)
65
+ input_tensor = torch.tensor(padded_input).unsqueeze(0) # Add batch dimension
66
+
67
+ with torch.no_grad():
68
+ outputs, _ = model(input_tensor)
69
+
70
+ predicted_ids = torch.argmax(outputs, dim=2).squeeze().tolist()
71
+ predicted_text = tokenizer.decode(predicted_ids)
72
+
73
+ return predicted_text
74
+
75
+ # Test the model with a new text
76
+ while True:
77
+ test_text = input(">>>")
78
+ response = generate_response(test_text, model, tokenizer)
79
+ print("Input:", test_text)
80
+ print("Response:", response)
summator_model.nit ADDED
Binary file (4.71 kB). View file
 
vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
vocab2.json ADDED
The diff for this file is too large to render. See raw diff