ipvikas commited on
Commit
efe7226
β€’
1 Parent(s): 9a83e2e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +225 -0
app.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import nltk
4
+ nltk.download('punkt')
5
+ from nltk.stem.porter import PorterStemmer
6
+ stemmer = PorterStemmer()
7
+
8
+ def tokenize(sentence):
9
+ return nltk.word_tokenize(sentence)
10
+
11
+ def stem(word):
12
+ return stemmer.stem(word.lower())
13
+
14
+ def bag_of_words(tokenized_sentence, words):
15
+ sentence_words = [stem(word) for word in tokenized_sentence]
16
+ bag = np.zeros(len(words), dtype=np.float32)
17
+ for idx, w in enumerate(words):
18
+ if w in sentence_words:
19
+ bag[idx] = 1
20
+ return bag
21
+
22
+ ########### 2 ###########
23
+ import torch
24
+ import torch.nn as nn
25
+
26
+ class NeuralNet(nn.Module):
27
+ def __init__(self, input_size, hidden_size, num_classes):
28
+ super(NeuralNet, self).__init__()
29
+ self.l1 = nn.Linear(input_size, hidden_size)
30
+ self.l2 = nn.Linear(hidden_size, hidden_size)
31
+ self.l3 = nn.Linear(hidden_size, num_classes)
32
+ self.relu = nn.ReLU()
33
+
34
+ def forward(self, x):
35
+ out = self.l1(x)
36
+ out = self.relu(out)
37
+ out = self.l2(out)
38
+ out = self.relu(out)
39
+ out = self.l3(out)
40
+ return out
41
+
42
+ ########### 3 ###########
43
+ import random
44
+ import json
45
+ from torch.utils.data import Dataset, DataLoader
46
+
47
+ path = 'intents_tweets.json'
48
+ with open(path, 'r') as f:
49
+ intents = json.load(f)
50
+
51
+ all_words = []
52
+ tags = []
53
+ xy = []
54
+
55
+ for intent in intents['intents']:
56
+ tag = intent['tag']
57
+ tags.append(tag)
58
+ for pattern in intent['patterns']:
59
+ w = tokenize(pattern)
60
+ all_words.extend(w)
61
+ xy.append((w, tag))
62
+
63
+ ignore_words = ['(',')','-',':',',',"'s",'!',':',"'","''",'--','.',':','?',';''[',']','``','o','’','β€œ','”','”','[',';']
64
+ all_words = [stem(w) for w in all_words if w not in ignore_words]
65
+ all_words = sorted(set(all_words))
66
+ tags = sorted(set(tags))
67
+
68
+ X_train = []
69
+ y_train = []
70
+ for (pattern_sentence, tag) in xy:
71
+ bag = bag_of_words(pattern_sentence, all_words)
72
+ X_train.append(bag)
73
+ label = tags.index(tag)
74
+ y_train.append(label)
75
+
76
+ X_train = np.array(X_train)
77
+ y_train = np.array(y_train)
78
+
79
+ # Hyper-parameters
80
+ num_epochs = 1000
81
+ batch_size = 8
82
+ learning_rate = 0.001
83
+ input_size = len(X_train[0])
84
+ hidden_size = 8
85
+ output_size = len(tags)
86
+
87
+ class ChatDataset(Dataset):
88
+ def __init__(self):
89
+ self.n_samples = len(X_train)
90
+ self.x_data = X_train
91
+ self.y_data = y_train
92
+
93
+ # support indexing such that dataset[i] can be used to get i-th sample
94
+ def __getitem__(self, index):
95
+ return self.x_data[index], self.y_data[index]
96
+
97
+ # we can call len(dataset) to return the size
98
+ def __len__(self):
99
+ return self.n_samples
100
+
101
+ #from model import NeuralNet
102
+ dataset = ChatDataset()
103
+ train_loader = DataLoader(dataset=dataset,batch_size=batch_size,shuffle=True,num_workers=2)
104
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
105
+ model = NeuralNet(input_size, hidden_size, output_size).to(device)
106
+
107
+ # Loss and optimizer
108
+ criterion = nn.CrossEntropyLoss()
109
+ optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
110
+
111
+ # Train the model
112
+ for epoch in range(num_epochs):
113
+ for (words, labels) in train_loader:
114
+ words = words.to(device)
115
+ labels = labels.to(dtype=torch.long).to(device)
116
+
117
+ # Forward pass
118
+ outputs = model(words)
119
+ loss = criterion(outputs, labels)
120
+
121
+ # Backward and optimize
122
+ optimizer.zero_grad()
123
+ loss.backward()
124
+ optimizer.step()
125
+
126
+ data = {
127
+ "model_state": model.state_dict(),
128
+ "input_size": input_size,
129
+ "hidden_size": hidden_size,
130
+ "output_size": output_size,
131
+ "all_words": all_words,
132
+ "tags": tags
133
+ }
134
+
135
+ FILE = "data.pth"
136
+ torch.save(data, FILE)
137
+
138
+ import random
139
+ import string # to process standard python strings
140
+
141
+ import warnings # Hide the warnings
142
+ warnings.filterwarnings('ignore')
143
+
144
+ import json
145
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
146
+
147
+ with open('intents_tweets.json', 'r') as json_data:
148
+ intents = json.load(json_data)
149
+
150
+ FILE = "data.pth"
151
+ data = torch.load(FILE, map_location=torch.device('cpu'))
152
+
153
+ input_size = data["input_size"]
154
+ hidden_size = data["hidden_size"]
155
+ output_size = data["output_size"]
156
+ all_words = data['all_words']
157
+ tags = data['tags']
158
+ model_state = data["model_state"]
159
+
160
+ model = NeuralNet(input_size, hidden_size, output_size).to(device)
161
+ model.load_state_dict(model_state)
162
+ model.eval()
163
+
164
+ bot_name = "WeASK"
165
+
166
+ from transformers import MBartForConditionalGeneration, MBart50Tokenizer
167
+ #model_name = "facebook/mbart-large-50-many-to-many-mmt"
168
+ #model = MBartForConditionalGeneration.from_pretrained(model_name)
169
+ #tokenizer = MBart50Tokenizer.from_pretrained(model_name)
170
+
171
+ import re, string, unicodedata
172
+ # import wikipedia as wk #pip install wikipedia
173
+ from collections import defaultdict
174
+
175
+ # def wikipedia_data(input_text):
176
+ # reg_ex = re.search('from wikipedia (.*)', input_text)#tell me about
177
+ # try:
178
+ # if reg_ex:
179
+ # topic = reg_ex.group(1)
180
+ # wiki = wk.summary(topic, sentences = 3)
181
+ # return wiki
182
+ # else:
183
+ # print("My apology, Can you please rephrase your query?")
184
+ # except Exception as e:
185
+ # print("I do not understand...Please rephrase")
186
+
187
+
188
+ def get_response(input_text):
189
+ #model_inputs = tokenizer(input_text, return_tensors="pt")
190
+ #generated_tokens = model.generate(**model_inputs,forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"])
191
+ #translation= tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
192
+
193
+ #string2=" ".join(map(str,translation ))
194
+ sentence= tokenize(input_text)
195
+ X = bag_of_words(sentence, all_words)
196
+ X = X.reshape(1, X.shape[0])
197
+ X = torch.from_numpy(X).to(device)
198
+
199
+ output = model(X)
200
+ _, predicted = torch.max(output, dim=1)
201
+
202
+ tag = tags[predicted.item()]
203
+
204
+ probs = torch.softmax(output, dim=1)
205
+ prob = probs[0][predicted.item()]
206
+ if prob.item() > 0.75:
207
+ for intent in intents['intents']:
208
+ if tag == intent["tag"]:
209
+ return random.choice(intent['responses'])
210
+ else:
211
+ #if "From Wikipedia" in sentence:
212
+ #if sentence:
213
+ robo_response = wikipedia_data(input_text)
214
+ return robo_response
215
+
216
+
217
+ title = "WeASK: ChatBOT"
218
+ description = "Hi!! enter your query here"
219
+ # examples = [
220
+ # ["from wikipedia what is calculus"]
221
+
222
+ ]
223
+
224
+ chatbot_demo = gr.Interface(fn=get_response, inputs = 'text',outputs='text',title = title, description = description, examples = examples)
225
+ chatbot_demo.launch()