Upload 5 files

Browse files

Files changed (5) hide show

chatbot_kel.py +34 -0
dataset_kel.json +1 -0
model_settings_kel.py +4 -0
responses_kel.txt +47 -0
tokenizer.py +59 -0

chatbot_kel.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import numpy as np
+from keras.saving import load_model
+from keras.preprocessing.text import Tokenizer
+from keras_self_attention import SeqSelfAttention
+from model_settings_kel import *
+import json
+from tokenizer import *
+with open(dataset_file, "r") as f:
+    dset = json.load(f)
+with open(responses_file, "r") as f:
+    lines = [x.rstrip("\n") for x in f.readlines()]
+fit_on_texts(list(dset.keys()))
+model = load_model("chatbot_kel.keras", custom_objects={"SeqSelfAttention": SeqSelfAttention})
+def find_line_number(array):
+    return sorted(zip(list(array), [x for x in range(len(array))]), key=lambda x:x[0], reverse=True)[0][1] # yeah, one big line, find the biggest value and return the number of the line
+def generate(text, verbose=1):
+    tokens = list(tokenize(text.lower())) # text into tokens (almost words)
+    tokens = (tokens+[0,]*inp_len)[:inp_len] # cutting off the sentence after inp_len words
+    prediction = model.predict(np.array([tokens,]), verbose=verbose)[0]
+    line = find_line_number(prediction)
+    return lines[line]
+if __name__ == "__main__": # if this code is not being imported, open the chat
+    while True:
+        inp = input("User: ")
+        gen = generate(inp)
+        if gen != "<null>": print(f"Bot: {gen}")

dataset_kel.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"xai.": 0, "hai.": 0, "... hai": 0, "ant, hai.": 0, "ant. xai.": 0, "ant. hai.": 0, "hai snepi": 0, "snepi, xai": 0, "snepi, hai": 0, "xai snepi": 0, "tei trate?": 1, "tei noh?": 2, "teies tai?": 3, "xai. teies tai?": 3, "at": 4, "nat": 5, "tore nat!": 5, "tei tonahe teies tai aek?": 6, "em knato teies tai xais.": 7, "emta": 4, "at. em tane tas.": 8, "tamos krate tei?": 9, "tamos ierhon?": 9, "em tonahe.": 10, "tei nat emta?": 11, "tei knato?": 12, "tei notorama?": 13, "tei chatgpt?": 14, "tei eho hame ehat.": 15, "tei tonahe emes mnor eho?": 15, "em mnor eho?": 15, "eho emes mnor eho.": 15, "em tane atea nat.": 16, "ec?": 17, "tore?": 17, "eho tore?": 17, "tei aek.": 18, "tamos nat atemo?": 19, "tei aer aek!": 18, "ant?": 20, "em tane atea nat": 21, "teies aek lehyn trone?": 22, "teies aek lehaer trone?": 22, "ant, teies aek lehaer trone?": 22, "ant. teies aek lehyn trone?": 22, "teies aek _ tamos?": 17, "ant. teies aek _ tamos?": 17, "ant. teies aek nier automata tamos?": 17, "ant. teies aek breaking bad tamos?": 17, "ant. teies aek mentalist tamos?": 17, "j aek.": 5, "i tonahe tas.": 23, "elc tane tas!": 1, "tei xais.": 5, "5+5=?": 24, "2+2=?": 25, "5*5=?": 26, "tei nat mas eho!": 27, "👍️": 4, "🏹": 4, "😢": 28, "😭": 28, "😿": 28, "😁": 29, "😀": 29, "😃": 29, "😄": 29, "🤣": 30, "😆": 30, "😂": 30, "xaho": 30, "tei?": 31, "tei tonahe mna?": 18, "a": 32, "c": 33, "e": 34, "i": 35, "0": 36, "1": 37, "2": 38, "3": 25, "4": 39, "5": 40, "6": 41, "7": 42, "8": 43, "tas es aek tai.": 29, "tos!": 44, "snepi, tos!": 44, "snepi. tos!": 44, "tos snepi!": 44, "em gouan.": 44, "la tho sa ehk ra es mna...": 45, "tei tonahe nat": 28, "eho aer lehaer trone?": 1, "snepi. eho lehyn trone?": 1, "teies tehst?": 46}

model_settings_kel.py ADDED Viewed

	@@ -0,0 +1,4 @@

+dataset_file = "dataset_kel.json"
+responses_file = "responses_kel.txt"
+emb_size = 128 # how big are the word vectors in the input (how much information can be fit into one word)
+inp_len = 10 # limit of the input length, after 10 words the

responses_kel.txt ADDED Viewed

	@@ -0,0 +1,47 @@

+xai.
+at!
+em trate!
+emes tai aek! teies?
+aek.
+atemo
+at. em notorama. emes xuak tai nat.
+em nat tonahe. em notorama.
+aek
+ierhon krate em.
+em eho aer toh nat mas.
+em knato - at.
+em tonahe mna toh.
+at! ierhon krate em glo em es haho.
+nat, em mna notorama.
+em nat tonahe mnor eho.
+elc... em knato... leho aer lehyn trone.
+em nat tonahe.
+em mna notorama.
+em.
+teies eho atea?
+eho glo em!
+nymer n!
+elc eho tas!
+10
+4
+25
+em nat mas. eho .set_chance (1-100)
+xais.
+aek!
+xaho!
+em knato at.
+c
+e
+i
+k
+1
+2
+3
+5
+6
+7
+8
+9
+tos.
+ala!!! ala!!!
+emes tehst es 97.

tokenizer.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import numpy as np
+s = " `1234567890-=~!@#$%^&*()_+[;,{:<];.}:>\\'/|\"?\n–№…«»→"
+def split(text):
+    o = []
+    t = ""
+    for i in text+" ":
+        if i in s:
+            if t != "":
+                o.append(t)
+                t = ""
+            if i != " ":
+                o.append(i)
+                t = ""
+        else:
+            t += i
+    return o
+def tokenize_2str(text: str):
+    text = split(text)
+    o = []
+    for i in text:
+        if i[-2:] == "es":
+            o.append(i[:-2])
+            o.append("<es>")
+        else:
+            o.append(i)
+    return o
+ind2text = ["<NULL>", "<UNK>", "<es>"]
+text2ind = {"<NULL>": 0, "<UNK>": 1, "<es>": 2}
+def fit_on_text(text: str):
+    global ind2text
+    global text2ind
+    tokens = tokenize_2str(text)
+    for i in tokens:
+        if i not in ind2text:
+            ind2text.append(i)
+            text2ind[i] = len(ind2text) - 1
+def fit_on_texts(texts):
+    for text in texts: fit_on_text(text)
+def tokenize(text: str):
+    text = tokenize_2str(text)
+    o = []
+    for i in text:
+        if i in ind2text:
+            o.append(text2ind[i])
+        else:
+            o.append(text2ind['<UNK>'])
+    return np.array(o)