ipvikas commited on
Commit
5443b1d
β€’
1 Parent(s): 740eb29

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -144
app.py CHANGED
@@ -1,16 +1,4 @@
1
- # -*- coding: utf-8 -*-
2
- """app.ipynb
3
-
4
- Automatically generated by Colaboratory.
5
-
6
- Original file is located at
7
- https://colab.research.google.com/drive/1Z_cMyllUfHf2lYtUtdS1ggVMpLCLg0-j
8
- """
9
  import gradio as gr
10
- ########### 1 ###########
11
-
12
-
13
- #intents.json --> nltk_utils.py --> model.py --> train.ipynb --> chat.ipynb
14
  import numpy as np
15
  import nltk
16
  nltk.download('punkt')
@@ -18,53 +6,23 @@ from nltk.stem.porter import PorterStemmer
18
  stemmer = PorterStemmer()
19
 
20
  def tokenize(sentence):
21
- """
22
- split sentence into array of words/tokens
23
- a token can be a word or punctuation character, or number
24
- """
25
  return nltk.word_tokenize(sentence)
26
 
27
- # print(tokenize('Hello how are you'))
28
-
29
  def stem(word):
30
- """
31
- stemming = find the root form of the word
32
- examples:
33
- words = ["organize", "organizes", "organizing"]
34
- words = [stem(w) for w in words]
35
- -> ["organ", "organ", "organ"]
36
- """
37
  return stemmer.stem(word.lower())
38
 
39
- # print(stem('organize'))
40
-
41
  def bag_of_words(tokenized_sentence, words):
42
- """
43
- return bag of words array:
44
- 1 for each known word that exists in the sentence, 0 otherwise
45
- example:
46
- sentence = ["hello", "how", "are", "you"]
47
- words = ["hi", "hello", "I", "you", "bye", "thank", "cool"]
48
- bog = [ 0 , 1 , 0 , 1 , 0 , 0 , 0]
49
- """
50
- # stem each word
51
  sentence_words = [stem(word) for word in tokenized_sentence]
52
- # initialize bag with 0 for each word
53
  bag = np.zeros(len(words), dtype=np.float32)
54
  for idx, w in enumerate(words):
55
  if w in sentence_words:
56
  bag[idx] = 1
57
-
58
  return bag
59
 
60
- # print(bag_of_words('Hello how are you', 'hi'))
61
-
62
  ########### 2 ###########
63
-
64
  import torch
65
  import torch.nn as nn
66
 
67
-
68
  class NeuralNet(nn.Module):
69
  def __init__(self, input_size, hidden_size, num_classes):
70
  super(NeuralNet, self).__init__()
@@ -79,85 +37,39 @@ class NeuralNet(nn.Module):
79
  out = self.l2(out)
80
  out = self.relu(out)
81
  out = self.l3(out)
82
- # no activation and no softmax at the end
83
  return out
84
 
85
  ########### 3 ###########
86
- import numpy as np
87
  import random
88
  import json
89
-
90
- import torch
91
- import torch.nn as nn
92
  from torch.utils.data import Dataset, DataLoader
93
 
94
- #2. Loading our JSON Data
95
- #from google.colab import drive #commented
96
- #drive.mount('/content/drive') #commented
97
-
98
- # Commented out IPython magic to ensure Python compatibility.
99
- # %cd '/content/drive/My Drive/Colab Notebooks/NLP/ChatBot/'
100
-
101
- #path = '/content/drive/My Drive/Colab Notebooks/NLP/ChatBot/intents.json'
102
-
103
- #!pwd
104
-
105
- import json
106
  path = 'intents.json'
107
  with open(path, 'r') as f:
108
  intents = json.load(f)
109
 
110
- # print(intents)
111
-
112
- # Commented out IPython magic to ensure Python compatibility.
113
- # %cd '/content/drive/My Drive/Colab Notebooks/NLP/ChatBot/intents.json'
114
-
115
- # Commented out IPython magic to ensure Python compatibility.
116
- # %pwd
117
-
118
- #!ls
119
-
120
- import nltk
121
- nltk.download('punkt')
122
-
123
- #from nltk_utils import bag_of_words, tokenize, stem
124
-
125
  all_words = []
126
  tags = []
127
  xy = []
128
- # loop through each sentence in our intents patterns
129
  for intent in intents['intents']:
130
  tag = intent['tag']
131
- # add to tag list
132
  tags.append(tag)
133
  for pattern in intent['patterns']:
134
- # tokenize each word in the sentence
135
  w = tokenize(pattern)
136
- # add to our words list
137
  all_words.extend(w)
138
- # add to xy pair
139
  xy.append((w, tag))
140
 
141
- # stem and lower each word
142
- # ignore_words = ['?', '.', '!']
143
  ignore_words = ['(',')','-',':',',',"'s",'!',':',"'","''",'--','.',':','?',';''[',']','``','o','’','β€œ','”','”','[',';']
144
  all_words = [stem(w) for w in all_words if w not in ignore_words]
145
- # remove duplicates and sort
146
  all_words = sorted(set(all_words))
147
  tags = sorted(set(tags))
148
 
149
- #print(len(xy), "patterns") #commented
150
- #print(len(tags), "tags:", tags) #commented
151
- #print(len(all_words), "unique stemmed words:", all_words) #commented
152
-
153
- # create training data
154
  X_train = []
155
  y_train = []
156
  for (pattern_sentence, tag) in xy:
157
- # X: bag of words for each pattern_sentence
158
  bag = bag_of_words(pattern_sentence, all_words)
159
  X_train.append(bag)
160
- # y: PyTorch CrossEntropyLoss needs only class labels, not one-hot
161
  label = tags.index(tag)
162
  y_train.append(label)
163
 
@@ -171,10 +83,8 @@ learning_rate = 0.001
171
  input_size = len(X_train[0])
172
  hidden_size = 8
173
  output_size = len(tags)
174
- #print(input_size, output_size) #commented
175
 
176
  class ChatDataset(Dataset):
177
-
178
  def __init__(self):
179
  self.n_samples = len(X_train)
180
  self.x_data = X_train
@@ -188,17 +98,12 @@ class ChatDataset(Dataset):
188
  def __len__(self):
189
  return self.n_samples
190
 
191
- import torch
192
- import torch.nn as nn
193
-
194
  #from model import NeuralNet
195
-
196
  dataset = ChatDataset()
197
  train_loader = DataLoader(dataset=dataset,batch_size=batch_size,shuffle=True,num_workers=2)
198
-
199
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
200
-
201
  model = NeuralNet(input_size, hidden_size, output_size).to(device)
 
202
  # Loss and optimizer
203
  criterion = nn.CrossEntropyLoss()
204
  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
@@ -208,23 +113,15 @@ for epoch in range(num_epochs):
208
  for (words, labels) in train_loader:
209
  words = words.to(device)
210
  labels = labels.to(dtype=torch.long).to(device)
211
-
212
  # Forward pass
213
  outputs = model(words)
214
- # if y would be one-hot, we must apply
215
- # labels = torch.max(labels, 1)[1]
216
  loss = criterion(outputs, labels)
217
 
218
  # Backward and optimize
219
  optimizer.zero_grad()
220
  loss.backward()
221
  optimizer.step()
222
-
223
- #if (epoch+1) % 100 == 0:
224
- #print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
225
-
226
-
227
- #print(f'final loss: {loss.item():.4f}')#commented
228
 
229
  data = {
230
  "model_state": model.state_dict(),
@@ -238,25 +135,13 @@ data = {
238
  FILE = "data.pth"
239
  torch.save(data, FILE)
240
 
241
- #print(f'training complete. file saved to {FILE}') #commented
242
-
243
-
244
  import random
245
  import string # to process standard python strings
246
 
247
  import warnings # Hide the warnings
248
  warnings.filterwarnings('ignore')
249
 
250
- import torch
251
-
252
- import nltk
253
- nltk.download('punkt')
254
-
255
- import random
256
  import json
257
-
258
- import torch
259
-
260
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
261
 
262
  with open('intents.json', 'r') as json_data:
@@ -278,17 +163,7 @@ model.eval()
278
 
279
  bot_name = "WeASK"
280
 
281
- ###removed
282
  from transformers import MBartForConditionalGeneration, MBart50Tokenizer
283
-
284
- #def download_model():
285
-
286
-
287
- #model, tokenizer = download_model()
288
-
289
-
290
-
291
- ################################
292
  #model_name = "facebook/mbart-large-50-many-to-many-mmt"
293
  #model = MBartForConditionalGeneration.from_pretrained(model_name)
294
  #tokenizer = MBart50Tokenizer.from_pretrained(model_name)
@@ -299,22 +174,6 @@ def get_response(input_text):
299
  #translation= tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
300
 
301
  #string2=" ".join(map(str,translation ))
302
-
303
-
304
-
305
-
306
- #print("Let's chat! (type 'quit' to exit)")
307
- #while True:
308
- # sentence = "do you use credit cards?"
309
- #try:
310
- #sentence= input("You: ")
311
- #if sentence== "Quit":
312
- #break
313
- #except EOFError as e:
314
- #print(end="")
315
- #if sentence== "quit":
316
- #break
317
-
318
  sentence= tokenize(input_text)
319
  X = bag_of_words(sentence, all_words)
320
  X = X.reshape(1, X.shape[0])
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
2
  import numpy as np
3
  import nltk
4
  nltk.download('punkt')
 
6
  stemmer = PorterStemmer()
7
 
8
  def tokenize(sentence):
 
 
 
 
9
  return nltk.word_tokenize(sentence)
10
 
 
 
11
  def stem(word):
 
 
 
 
 
 
 
12
  return stemmer.stem(word.lower())
13
 
 
 
14
  def bag_of_words(tokenized_sentence, words):
 
 
 
 
 
 
 
 
 
15
  sentence_words = [stem(word) for word in tokenized_sentence]
 
16
  bag = np.zeros(len(words), dtype=np.float32)
17
  for idx, w in enumerate(words):
18
  if w in sentence_words:
19
  bag[idx] = 1
 
20
  return bag
21
 
 
 
22
  ########### 2 ###########
 
23
  import torch
24
  import torch.nn as nn
25
 
 
26
  class NeuralNet(nn.Module):
27
  def __init__(self, input_size, hidden_size, num_classes):
28
  super(NeuralNet, self).__init__()
 
37
  out = self.l2(out)
38
  out = self.relu(out)
39
  out = self.l3(out)
 
40
  return out
41
 
42
  ########### 3 ###########
 
43
  import random
44
  import json
 
 
 
45
  from torch.utils.data import Dataset, DataLoader
46
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  path = 'intents.json'
48
  with open(path, 'r') as f:
49
  intents = json.load(f)
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  all_words = []
52
  tags = []
53
  xy = []
54
+
55
  for intent in intents['intents']:
56
  tag = intent['tag']
 
57
  tags.append(tag)
58
  for pattern in intent['patterns']:
 
59
  w = tokenize(pattern)
 
60
  all_words.extend(w)
 
61
  xy.append((w, tag))
62
 
 
 
63
  ignore_words = ['(',')','-',':',',',"'s",'!',':',"'","''",'--','.',':','?',';''[',']','``','o','’','β€œ','”','”','[',';']
64
  all_words = [stem(w) for w in all_words if w not in ignore_words]
 
65
  all_words = sorted(set(all_words))
66
  tags = sorted(set(tags))
67
 
 
 
 
 
 
68
  X_train = []
69
  y_train = []
70
  for (pattern_sentence, tag) in xy:
 
71
  bag = bag_of_words(pattern_sentence, all_words)
72
  X_train.append(bag)
 
73
  label = tags.index(tag)
74
  y_train.append(label)
75
 
 
83
  input_size = len(X_train[0])
84
  hidden_size = 8
85
  output_size = len(tags)
 
86
 
87
  class ChatDataset(Dataset):
 
88
  def __init__(self):
89
  self.n_samples = len(X_train)
90
  self.x_data = X_train
 
98
  def __len__(self):
99
  return self.n_samples
100
 
 
 
 
101
  #from model import NeuralNet
 
102
  dataset = ChatDataset()
103
  train_loader = DataLoader(dataset=dataset,batch_size=batch_size,shuffle=True,num_workers=2)
 
104
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
105
  model = NeuralNet(input_size, hidden_size, output_size).to(device)
106
+
107
  # Loss and optimizer
108
  criterion = nn.CrossEntropyLoss()
109
  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
 
113
  for (words, labels) in train_loader:
114
  words = words.to(device)
115
  labels = labels.to(dtype=torch.long).to(device)
116
+
117
  # Forward pass
118
  outputs = model(words)
 
 
119
  loss = criterion(outputs, labels)
120
 
121
  # Backward and optimize
122
  optimizer.zero_grad()
123
  loss.backward()
124
  optimizer.step()
 
 
 
 
 
 
125
 
126
  data = {
127
  "model_state": model.state_dict(),
 
135
  FILE = "data.pth"
136
  torch.save(data, FILE)
137
 
 
 
 
138
  import random
139
  import string # to process standard python strings
140
 
141
  import warnings # Hide the warnings
142
  warnings.filterwarnings('ignore')
143
 
 
 
 
 
 
 
144
  import json
 
 
 
145
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
146
 
147
  with open('intents.json', 'r') as json_data:
 
163
 
164
  bot_name = "WeASK"
165
 
 
166
  from transformers import MBartForConditionalGeneration, MBart50Tokenizer
 
 
 
 
 
 
 
 
 
167
  #model_name = "facebook/mbart-large-50-many-to-many-mmt"
168
  #model = MBartForConditionalGeneration.from_pretrained(model_name)
169
  #tokenizer = MBart50Tokenizer.from_pretrained(model_name)
 
174
  #translation= tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
175
 
176
  #string2=" ".join(map(str,translation ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  sentence= tokenize(input_text)
178
  X = bag_of_words(sentence, all_words)
179
  X = X.reshape(1, X.shape[0])