File size: 3,653 Bytes
a0c7d15
 
 
 
6514b63
 
 
 
 
5397177
 
3e11b23
 
6514b63
 
 
 
 
 
 
 
 
 
 
 
 
a0c7d15
 
6514b63
 
 
 
5397177
6514b63
 
 
a0c7d15
6514b63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0c7d15
 
 
6514b63
a0c7d15
6514b63
 
3e11b23
6514b63
 
a0c7d15
 
3e11b23
6514b63
5397177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
984fad2
5397177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6514b63
 
 
 
 
5397177
6514b63
5397177
6514b63
 
 
 
 
 
 
 
 
 
5397177
6514b63
 
 
7c11fe0
6514b63
 
 
0acf6d0
b746c0c
a0c7d15
d1c6a4d
a0c7d15
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import pyjokes
import gradio as gr
import numpy as np
from nltk.corpus import wordnet as wn
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

import nltk
nltk.download('all')
import string
from sklearn.feature_extraction.text import TfidfVectorizer
# import fastai


def similarity(input, joke):
    return cosine_similarity(input, joke)


def get_best(input):
    model = SentenceTransformer('bert-base-nli-mean-tokens')
    max_similarity = -1
    max_idx = 0
    jokes = pyjokes.get_jokes(language='en', category='all')
    jokes_embedding = model.encode(jokes)
    input_embedding = model.encode(input)
    for idx, joke_embedding in enumerate(jokes_embedding):
        sim = similarity(joke_embedding.reshape(-1, 1),
                         input_embedding.reshape(-1, 1))
        if(np.sum(sim) > np.sum(max_similarity)):
            max_idx = idx
            max_similarity = sim
    if(np.sum(max_similarity) != -1):
        return jokes[max_idx]+'😁🤣'
    else:
        return None


def generate_list(input):
    result = []
    n = len(input)
    for Len in range(2, n + 1):
        for i in range(n - Len + 1):
            j = i + Len - 1
            tem = ""
            for k in range(i, j + 1):
                tem += input[k]
            result.append(tem)
    return result


def pattern(input):
    response = input
    for substr in generate_list(input):
        try:
            syn = wn.synsets(substr)[1].hypernyms()[0].hyponyms()[
                0].hyponyms()[0].lemmas()[0].name()
        except:
            continue
        if(syn != None):
            response = response.replace(substr, syn.upper())
            break

    if(input == response):
        return None
    else:
        return response+'??😁🤣'

lemmer = nltk.stem.WordNetLemmatizer()
def LemTokens(tokens):
  return [lemmer.lemmatize(token) for token in tokens]

remove_punct_dict= dict((ord(punct), None) for punct in string.punctuation)
def LemNormalize(text):
  return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))

def NLTK(input):
    f = open('corpus.txt', errors='strict')
    data = f.read()
    data = data.lower()
    nltk.download('punkt')
    nltk.download('wordnet')
    sent_tokens = nltk.sent_tokenize(data)
    return bot(sent_tokens)

def bot(sent_tokens):
  robo1_response = ''
  TfidfVec = TfidfVectorizer(tokenizer = LemNormalize, stop_words='english')
  tfidf = TfidfVec.fit_transform(sent_tokens)
  vals = cosine_similarity(tfidf[-1], tfidf)
  idx = vals.argsort()[0][-2]
  flat = vals.flatten()
  flat.sort()
  req_tfidf = flat[-2]
  if (req_tfidf == 0):
    robo1_response= robo1_response+"I could not answer this right now but you can contact the head of our dept (PUSPHA RAJ)." # add the dept recommendation engine and contact details
    return robo1_response
  else:
    robo1_response = robo1_response+sent_tokens[idx]
    return robo1_response

def generator(input=None):
    response = []
    if input:

        out1 = NLTK(input)
        if(out1):
            response.append(out)

        out2 = pattern(input)
        if(out2):
            response.append(out2)

        out3 = get_best(input)
        if(out3):
            response.append(out3)

    else:
        out1 = NLTK("Hi, what's the matter")
        if(out1):
            for out in out1:
                response.append(out)

        out2 = pyjokes.get_joke(language='en', category='all')
        if(out2):
            response.append(out2)

    return response  # think of doing this

iface = gr.Interface(fn=generator, inputs="text", outputs="text")
iface.launch()