File size: 6,419 Bytes
efe7226 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 |
import gradio as gr
import numpy as np
import nltk
nltk.download('punkt')
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()
def tokenize(sentence):
return nltk.word_tokenize(sentence)
def stem(word):
return stemmer.stem(word.lower())
def bag_of_words(tokenized_sentence, words):
sentence_words = [stem(word) for word in tokenized_sentence]
bag = np.zeros(len(words), dtype=np.float32)
for idx, w in enumerate(words):
if w in sentence_words:
bag[idx] = 1
return bag
########### 2 ###########
import torch
import torch.nn as nn
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(NeuralNet, self).__init__()
self.l1 = nn.Linear(input_size, hidden_size)
self.l2 = nn.Linear(hidden_size, hidden_size)
self.l3 = nn.Linear(hidden_size, num_classes)
self.relu = nn.ReLU()
def forward(self, x):
out = self.l1(x)
out = self.relu(out)
out = self.l2(out)
out = self.relu(out)
out = self.l3(out)
return out
########### 3 ###########
import random
import json
from torch.utils.data import Dataset, DataLoader
path = 'intents_tweets.json'
with open(path, 'r') as f:
intents = json.load(f)
all_words = []
tags = []
xy = []
for intent in intents['intents']:
tag = intent['tag']
tags.append(tag)
for pattern in intent['patterns']:
w = tokenize(pattern)
all_words.extend(w)
xy.append((w, tag))
ignore_words = ['(',')','-',':',',',"'s",'!',':',"'","''",'--','.',':','?',';''[',']','``','o','’','“','”','”','[',';']
all_words = [stem(w) for w in all_words if w not in ignore_words]
all_words = sorted(set(all_words))
tags = sorted(set(tags))
X_train = []
y_train = []
for (pattern_sentence, tag) in xy:
bag = bag_of_words(pattern_sentence, all_words)
X_train.append(bag)
label = tags.index(tag)
y_train.append(label)
X_train = np.array(X_train)
y_train = np.array(y_train)
# Hyper-parameters
num_epochs = 1000
batch_size = 8
learning_rate = 0.001
input_size = len(X_train[0])
hidden_size = 8
output_size = len(tags)
class ChatDataset(Dataset):
def __init__(self):
self.n_samples = len(X_train)
self.x_data = X_train
self.y_data = y_train
# support indexing such that dataset[i] can be used to get i-th sample
def __getitem__(self, index):
return self.x_data[index], self.y_data[index]
# we can call len(dataset) to return the size
def __len__(self):
return self.n_samples
#from model import NeuralNet
dataset = ChatDataset()
train_loader = DataLoader(dataset=dataset,batch_size=batch_size,shuffle=True,num_workers=2)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = NeuralNet(input_size, hidden_size, output_size).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Train the model
for epoch in range(num_epochs):
for (words, labels) in train_loader:
words = words.to(device)
labels = labels.to(dtype=torch.long).to(device)
# Forward pass
outputs = model(words)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
data = {
"model_state": model.state_dict(),
"input_size": input_size,
"hidden_size": hidden_size,
"output_size": output_size,
"all_words": all_words,
"tags": tags
}
FILE = "data.pth"
torch.save(data, FILE)
import random
import string # to process standard python strings
import warnings # Hide the warnings
warnings.filterwarnings('ignore')
import json
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
with open('intents_tweets.json', 'r') as json_data:
intents = json.load(json_data)
FILE = "data.pth"
data = torch.load(FILE, map_location=torch.device('cpu'))
input_size = data["input_size"]
hidden_size = data["hidden_size"]
output_size = data["output_size"]
all_words = data['all_words']
tags = data['tags']
model_state = data["model_state"]
model = NeuralNet(input_size, hidden_size, output_size).to(device)
model.load_state_dict(model_state)
model.eval()
bot_name = "WeASK"
from transformers import MBartForConditionalGeneration, MBart50Tokenizer
#model_name = "facebook/mbart-large-50-many-to-many-mmt"
#model = MBartForConditionalGeneration.from_pretrained(model_name)
#tokenizer = MBart50Tokenizer.from_pretrained(model_name)
import re, string, unicodedata
# import wikipedia as wk #pip install wikipedia
from collections import defaultdict
# def wikipedia_data(input_text):
# reg_ex = re.search('from wikipedia (.*)', input_text)#tell me about
# try:
# if reg_ex:
# topic = reg_ex.group(1)
# wiki = wk.summary(topic, sentences = 3)
# return wiki
# else:
# print("My apology, Can you please rephrase your query?")
# except Exception as e:
# print("I do not understand...Please rephrase")
def get_response(input_text):
#model_inputs = tokenizer(input_text, return_tensors="pt")
#generated_tokens = model.generate(**model_inputs,forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"])
#translation= tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
#string2=" ".join(map(str,translation ))
sentence= tokenize(input_text)
X = bag_of_words(sentence, all_words)
X = X.reshape(1, X.shape[0])
X = torch.from_numpy(X).to(device)
output = model(X)
_, predicted = torch.max(output, dim=1)
tag = tags[predicted.item()]
probs = torch.softmax(output, dim=1)
prob = probs[0][predicted.item()]
if prob.item() > 0.75:
for intent in intents['intents']:
if tag == intent["tag"]:
return random.choice(intent['responses'])
else:
#if "From Wikipedia" in sentence:
#if sentence:
robo_response = wikipedia_data(input_text)
return robo_response
title = "WeASK: ChatBOT"
description = "Hi!! enter your query here"
# examples = [
# ["from wikipedia what is calculus"]
]
chatbot_demo = gr.Interface(fn=get_response, inputs = 'text',outputs='text',title = title, description = description, examples = examples)
chatbot_demo.launch() |