Spaces:

ipvikas
/

weASK

Runtime error

App Files Files Community

ipvikas commited on Sep 4, 2022

Commit

5443b1d

•

1 Parent(s): 740eb29

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -144

app.py CHANGED Viewed

@@ -1,16 +1,4 @@
-# -*- coding: utf-8 -*-
-"""app.ipynb
-Automatically generated by Colaboratory.
-Original file is located at
-    https://colab.research.google.com/drive/1Z_cMyllUfHf2lYtUtdS1ggVMpLCLg0-j
-"""
 import gradio as gr
-###########  1  ###########
-#intents.json --> nltk_utils.py -->  model.py --> train.ipynb --> chat.ipynb
 import numpy as np
 import nltk
 nltk.download('punkt')
@@ -18,53 +6,23 @@ from nltk.stem.porter import PorterStemmer
 stemmer = PorterStemmer()
 def tokenize(sentence):
-    """
-    split sentence into array of words/tokens
-    a token can be a word or punctuation character, or number
-    """
     return nltk.word_tokenize(sentence)
-# print(tokenize('Hello how are you'))
 def stem(word):
-    """
-    stemming = find the root form of the word
-    examples:
-    words = ["organize", "organizes", "organizing"]
-    words = [stem(w) for w in words]
-    -> ["organ", "organ", "organ"]
-    """
     return stemmer.stem(word.lower())
-# print(stem('organize'))
 def bag_of_words(tokenized_sentence, words):
-    """
-    return bag of words array:
-    1 for each known word that exists in the sentence, 0 otherwise
-    example:
-    sentence = ["hello", "how", "are", "you"]
-    words = ["hi", "hello", "I", "you", "bye", "thank", "cool"]
-    bog   = [  0 ,    1 ,    0 ,   1 ,    0 ,    0 ,      0]
-    """
-    # stem each word
     sentence_words = [stem(word) for word in tokenized_sentence]
-    # initialize bag with 0 for each word
     bag = np.zeros(len(words), dtype=np.float32)
     for idx, w in enumerate(words):
         if w in sentence_words:
             bag[idx] = 1
     return bag
-# print(bag_of_words('Hello how are you', 'hi'))
 ###########  2  ###########
 import torch
 import torch.nn as nn
 class NeuralNet(nn.Module):
     def __init__(self, input_size, hidden_size, num_classes):
         super(NeuralNet, self).__init__()
@@ -79,85 +37,39 @@ class NeuralNet(nn.Module):
         out = self.l2(out)
         out = self.relu(out)
         out = self.l3(out)
-        # no activation and no softmax at the end
         return out
 ###########  3  ###########
-import numpy as np
 import random
 import json
-import torch
-import torch.nn as nn
 from torch.utils.data import Dataset, DataLoader
-#2. Loading our JSON Data
-#from google.colab import drive #commented
-#drive.mount('/content/drive')  #commented
-# Commented out IPython magic to ensure Python compatibility.
-# %cd '/content/drive/My Drive/Colab Notebooks/NLP/ChatBot/'
-#path = '/content/drive/My Drive/Colab Notebooks/NLP/ChatBot/intents.json'
-#!pwd
-import json
 path = 'intents.json'
 with open(path, 'r') as f:
     intents = json.load(f)
-# print(intents)
-# Commented out IPython magic to ensure Python compatibility.
-# %cd '/content/drive/My Drive/Colab Notebooks/NLP/ChatBot/intents.json'
-# Commented out IPython magic to ensure Python compatibility.
-# %pwd
-#!ls
-import nltk
-nltk.download('punkt')
-#from nltk_utils import bag_of_words, tokenize, stem
 all_words = []
 tags = []
 xy = []
-# loop through each sentence in our intents patterns
 for intent in intents['intents']:
     tag = intent['tag']
-    # add to tag list
     tags.append(tag)
     for pattern in intent['patterns']:
-        # tokenize each word in the sentence
         w = tokenize(pattern)
-        # add to our words list
         all_words.extend(w)
-        # add to xy pair
         xy.append((w, tag))
-# stem and lower each word
-# ignore_words = ['?', '.', '!']
 ignore_words = ['(',')','-',':',',',"'s",'!',':',"'","''",'--','.',':','?',';''[',']','``','o','’','“','”','”','[',';']
 all_words = [stem(w) for w in all_words if w not in ignore_words]
-# remove duplicates and sort
 all_words = sorted(set(all_words))
 tags = sorted(set(tags))
-#print(len(xy), "patterns") #commented
-#print(len(tags), "tags:", tags) #commented
-#print(len(all_words), "unique stemmed words:", all_words) #commented
-# create training data
 X_train = []
 y_train = []
 for (pattern_sentence, tag) in xy:
-    # X: bag of words for each pattern_sentence
     bag = bag_of_words(pattern_sentence, all_words)
     X_train.append(bag)
-    # y: PyTorch CrossEntropyLoss needs only class labels, not one-hot
     label = tags.index(tag)
     y_train.append(label)
@@ -171,10 +83,8 @@ learning_rate = 0.001
 input_size = len(X_train[0])
 hidden_size = 8
 output_size = len(tags)
-#print(input_size, output_size) #commented
 class ChatDataset(Dataset):
     def __init__(self):
         self.n_samples = len(X_train)
         self.x_data = X_train
@@ -188,17 +98,12 @@ class ChatDataset(Dataset):
     def __len__(self):
         return self.n_samples
-import torch
-import torch.nn as nn
 #from model import NeuralNet
 dataset = ChatDataset()
 train_loader = DataLoader(dataset=dataset,batch_size=batch_size,shuffle=True,num_workers=2)
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 model = NeuralNet(input_size, hidden_size, output_size).to(device)
 # Loss and optimizer
 criterion = nn.CrossEntropyLoss()
 optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
@@ -208,23 +113,15 @@ for epoch in range(num_epochs):
     for (words, labels) in train_loader:
         words = words.to(device)
         labels = labels.to(dtype=torch.long).to(device)
         # Forward pass
         outputs = model(words)
-        # if y would be one-hot, we must apply
-        # labels = torch.max(labels, 1)[1]
         loss = criterion(outputs, labels)
         # Backward and optimize
         optimizer.zero_grad()
         loss.backward()
         optimizer.step()
-    #if (epoch+1) % 100 == 0:
-        #print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
-#print(f'final loss: {loss.item():.4f}')#commented
 data = {
 "model_state": model.state_dict(),
@@ -238,25 +135,13 @@ data = {
 FILE = "data.pth"
 torch.save(data, FILE)
-#print(f'training complete. file saved to {FILE}') #commented
 import random
 import string # to process standard python strings
 import warnings # Hide the warnings
 warnings.filterwarnings('ignore')
-import torch
-import nltk
-nltk.download('punkt')
-import random
 import json
-import torch
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 with open('intents.json', 'r') as json_data:
@@ -278,17 +163,7 @@ model.eval()
 bot_name = "WeASK"
-###removed
 from transformers import MBartForConditionalGeneration, MBart50Tokenizer
-#def download_model():
-#model, tokenizer = download_model()
-################################
 #model_name = "facebook/mbart-large-50-many-to-many-mmt"
 #model = MBartForConditionalGeneration.from_pretrained(model_name)
 #tokenizer = MBart50Tokenizer.from_pretrained(model_name)
@@ -299,22 +174,6 @@ def get_response(input_text):
     #translation= tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
     #string2=" ".join(map(str,translation ))
-    #print("Let's chat! (type 'quit' to exit)")
-    #while True:
-    # sentence = "do you use credit cards?"
-        #try:
-            #sentence= input("You: ")
-            #if sentence== "Quit":
-                #break
-        #except EOFError as e:
-            #print(end="")
-    #if sentence== "quit":
-        #break
     sentence= tokenize(input_text)
     X = bag_of_words(sentence, all_words)
     X = X.reshape(1, X.shape[0])

 import gradio as gr
 import numpy as np
 import nltk
 nltk.download('punkt')
 stemmer = PorterStemmer()
 def tokenize(sentence):
     return nltk.word_tokenize(sentence)
 def stem(word):
     return stemmer.stem(word.lower())
 def bag_of_words(tokenized_sentence, words):
     sentence_words = [stem(word) for word in tokenized_sentence]
     bag = np.zeros(len(words), dtype=np.float32)
     for idx, w in enumerate(words):
         if w in sentence_words:
             bag[idx] = 1
     return bag
 ###########  2  ###########
 import torch
 import torch.nn as nn
 class NeuralNet(nn.Module):
     def __init__(self, input_size, hidden_size, num_classes):
         super(NeuralNet, self).__init__()
         out = self.l2(out)
         out = self.relu(out)
         out = self.l3(out)
         return out
 ###########  3  ###########
 import random
 import json
 from torch.utils.data import Dataset, DataLoader
 path = 'intents.json'
 with open(path, 'r') as f:
     intents = json.load(f)
 all_words = []
 tags = []
 xy = []
 for intent in intents['intents']:
     tag = intent['tag']
     tags.append(tag)
     for pattern in intent['patterns']:
         w = tokenize(pattern)
         all_words.extend(w)
         xy.append((w, tag))
 ignore_words = ['(',')','-',':',',',"'s",'!',':',"'","''",'--','.',':','?',';''[',']','``','o','’','“','”','”','[',';']
 all_words = [stem(w) for w in all_words if w not in ignore_words]
 all_words = sorted(set(all_words))
 tags = sorted(set(tags))
 X_train = []
 y_train = []
 for (pattern_sentence, tag) in xy:
     bag = bag_of_words(pattern_sentence, all_words)
     X_train.append(bag)
     label = tags.index(tag)
     y_train.append(label)
 input_size = len(X_train[0])
 hidden_size = 8
 output_size = len(tags)
 class ChatDataset(Dataset):
     def __init__(self):
         self.n_samples = len(X_train)
         self.x_data = X_train
     def __len__(self):
         return self.n_samples
 #from model import NeuralNet
 dataset = ChatDataset()
 train_loader = DataLoader(dataset=dataset,batch_size=batch_size,shuffle=True,num_workers=2)
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 model = NeuralNet(input_size, hidden_size, output_size).to(device)
 # Loss and optimizer
 criterion = nn.CrossEntropyLoss()
 optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
     for (words, labels) in train_loader:
         words = words.to(device)
         labels = labels.to(dtype=torch.long).to(device)
         # Forward pass
         outputs = model(words)
         loss = criterion(outputs, labels)
         # Backward and optimize
         optimizer.zero_grad()
         loss.backward()
         optimizer.step()
 data = {
 "model_state": model.state_dict(),
 FILE = "data.pth"
 torch.save(data, FILE)
 import random
 import string # to process standard python strings
 import warnings # Hide the warnings
 warnings.filterwarnings('ignore')
 import json
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 with open('intents.json', 'r') as json_data:
 bot_name = "WeASK"
 from transformers import MBartForConditionalGeneration, MBart50Tokenizer
 #model_name = "facebook/mbart-large-50-many-to-many-mmt"
 #model = MBartForConditionalGeneration.from_pretrained(model_name)
 #tokenizer = MBart50Tokenizer.from_pretrained(model_name)
     #translation= tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
     #string2=" ".join(map(str,translation ))
     sentence= tokenize(input_text)
     X = bag_of_words(sentence, all_words)
     X = X.reshape(1, X.shape[0])