File size: 6,464 Bytes
628908b f2b1250 7025527 f2b1250 b8c19dd f2b1250 96c3f1b f2b1250 5443b1d f2b1250 89c38bf f2b1250 5443b1d f2b1250 5443b1d f2b1250 704ccbc f2b1250 4eeccb1 25e7910 bbeeaee 2e7e616 1ab4f04 33bdc2f 6796c61 33bdc2f 4eeccb1 7f2a0c1 faae5b2 7f2a0c1 7025527 f2b1250 0bc96cb f2b1250 997b1af 1ab4f04 997b1af 33bdc2f f2b1250 a7354ab a44a283 7025527 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 |
import gradio as gr
import numpy as np
import nltk
nltk.download('punkt')
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()
def tokenize(sentence):
return nltk.word_tokenize(sentence)
def stem(word):
return stemmer.stem(word.lower())
def bag_of_words(tokenized_sentence, words):
sentence_words = [stem(word) for word in tokenized_sentence]
bag = np.zeros(len(words), dtype=np.float32)
for idx, w in enumerate(words):
if w in sentence_words:
bag[idx] = 1
return bag
########### 2 ###########
import torch
import torch.nn as nn
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(NeuralNet, self).__init__()
self.l1 = nn.Linear(input_size, hidden_size)
self.l2 = nn.Linear(hidden_size, hidden_size)
self.l3 = nn.Linear(hidden_size, num_classes)
self.relu = nn.ReLU()
def forward(self, x):
out = self.l1(x)
out = self.relu(out)
out = self.l2(out)
out = self.relu(out)
out = self.l3(out)
return out
########### 3 ###########
import random
import json
from torch.utils.data import Dataset, DataLoader
path = 'intents.json'
with open(path, 'r') as f:
intents = json.load(f)
all_words = []
tags = []
xy = []
for intent in intents['intents']:
tag = intent['tag']
tags.append(tag)
for pattern in intent['patterns']:
w = tokenize(pattern)
all_words.extend(w)
xy.append((w, tag))
ignore_words = ['(',')','-',':',',',"'s",'!',':',"'","''",'--','.',':','?',';''[',']','``','o','’','“','”','”','[',';']
all_words = [stem(w) for w in all_words if w not in ignore_words]
all_words = sorted(set(all_words))
tags = sorted(set(tags))
X_train = []
y_train = []
for (pattern_sentence, tag) in xy:
bag = bag_of_words(pattern_sentence, all_words)
X_train.append(bag)
label = tags.index(tag)
y_train.append(label)
X_train = np.array(X_train)
y_train = np.array(y_train)
# Hyper-parameters
num_epochs = 1000
batch_size = 8
learning_rate = 0.001
input_size = len(X_train[0])
hidden_size = 8
output_size = len(tags)
class ChatDataset(Dataset):
def __init__(self):
self.n_samples = len(X_train)
self.x_data = X_train
self.y_data = y_train
# support indexing such that dataset[i] can be used to get i-th sample
def __getitem__(self, index):
return self.x_data[index], self.y_data[index]
# we can call len(dataset) to return the size
def __len__(self):
return self.n_samples
#from model import NeuralNet
dataset = ChatDataset()
train_loader = DataLoader(dataset=dataset,batch_size=batch_size,shuffle=True,num_workers=2)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = NeuralNet(input_size, hidden_size, output_size).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Train the model
for epoch in range(num_epochs):
for (words, labels) in train_loader:
words = words.to(device)
labels = labels.to(dtype=torch.long).to(device)
# Forward pass
outputs = model(words)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
data = {
"model_state": model.state_dict(),
"input_size": input_size,
"hidden_size": hidden_size,
"output_size": output_size,
"all_words": all_words,
"tags": tags
}
FILE = "data.pth"
torch.save(data, FILE)
import random
import string # to process standard python strings
import warnings # Hide the warnings
warnings.filterwarnings('ignore')
import json
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
with open('intents.json', 'r') as json_data:
intents = json.load(json_data)
FILE = "data.pth"
data = torch.load(FILE, map_location=torch.device('cpu'))
input_size = data["input_size"]
hidden_size = data["hidden_size"]
output_size = data["output_size"]
all_words = data['all_words']
tags = data['tags']
model_state = data["model_state"]
model = NeuralNet(input_size, hidden_size, output_size).to(device)
model.load_state_dict(model_state)
model.eval()
bot_name = "WeASK"
from transformers import MBartForConditionalGeneration, MBart50Tokenizer
#model_name = "facebook/mbart-large-50-many-to-many-mmt"
#model = MBartForConditionalGeneration.from_pretrained(model_name)
#tokenizer = MBart50Tokenizer.from_pretrained(model_name)
import re, string, unicodedata
import wikipedia as wk #pip install wikipedia
from collections import defaultdict
def wikipedia_data(input_text):
reg_ex = re.search('from wikipedia (.*)', input_text)#tell me about
try:
if reg_ex:
topic = reg_ex.group(1)
wiki = wk.summary(topic, sentences = 3)
return wiki
else:
print("My apology, Can you please rephrase your query?")
except Exception as e:
print("I do not understand...Please rephrase")
def get_response(input_text):
#model_inputs = tokenizer(input_text, return_tensors="pt")
#generated_tokens = model.generate(**model_inputs,forced_bos_token_id=tokenizer.lang_code_to_id["en_XX"])
#translation= tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
#string2=" ".join(map(str,translation ))
sentence= tokenize(input_text)
X = bag_of_words(sentence, all_words)
X = X.reshape(1, X.shape[0])
X = torch.from_numpy(X).to(device)
output = model(X)
_, predicted = torch.max(output, dim=1)
tag = tags[predicted.item()]
probs = torch.softmax(output, dim=1)
prob = probs[0][predicted.item()]
if prob.item() > 0.75:
for intent in intents['intents']:
if tag == intent["tag"]:
return random.choice(intent['responses'])
else:
#if "From Wikipedia" in sentence:
#if sentence:
robo_response = wikipedia_data(input_text)
return robo_response
title = "WeASK: ChatBOT"
description = "Hi!! enter your query or to get answers from Wikipedia, write like 'From Wikipedia <your query>'... See examples."
examples = [
["from wikipedia what is calculus"]
]
chatbot_demo = gr.Interface(fn=get_response, inputs = 'text',outputs='text',title = title, description = description, examples = examples)
chatbot_demo.launch() |