LTW-1181203031's picture
Update app.py
fdc8d43
raw
history blame
5.77 kB
from haystack.document_stores.memory import InMemoryDocumentStore
from haystack.nodes import TfidfRetriever, FARMReader
import pickle
pickle_file = 'knowledge_graph.pickle'
# Load the knowledge graph from the pickle file
with open(pickle_file, 'rb') as f:
knowledge_graph = pickle.load(f)
document_store = InMemoryDocumentStore()
node_sentences = {}
documents = []
nodes = [node for node in knowledge_graph.nodes() if node is not None]
for node in nodes:
# Get all the edges related to the current node
related_edges = [edge for edge in knowledge_graph.edges() if edge[0] == node or edge[1] == node]
# Get the parents and grandparents of the current node
parents = [edge[0] for edge in related_edges if edge[1] == node]
grandparents = []
for parent in parents:
grandparents.extend([edge[0] for edge in related_edges if edge[1] == parent])
# Get the children and grandchildren of the current node
children = [edge[1] for edge in related_edges if edge[0] == node]
grandchildren = []
for child in children:
grandchildren.extend([edge[1] for edge in related_edges if edge[0] == child])
# Create the sentence by combining all the related nodes
sentence_parts = grandparents + parents + [node] + children + grandchildren
sentence = ' '.join(sentence_parts)
# Store the sentence for the current node
node_sentences[node] = sentence
# Create the document with the node and the sentence as the content
documents.append({'text': node, 'content': sentence})
document_store.write_documents(documents)
#Initialize the retriever
retriever = TfidfRetriever(document_store=document_store)
#Initialize the reader
model_name = "primasr/multilingualbert-for-eqa-finetuned"
reader = FARMReader(model_name_or_path=model_name, use_gpu=False)
#Create pipeline with the component of retriever and reader
from haystack.pipelines import Pipeline
pipeline = Pipeline()
pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"])
pipeline.add_node(component=reader, name="Reader", inputs=["Retriever"])
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
# Targeted to Translate English queries to Malay Language
# Initialize the tokenizer
en_id_tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-id")
# Initialize the model
en_id_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-id")
# Targeted to Translate Malay Answer to English Language
# Initialize the tokenizer
id_en_tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-id-en")
# Initialize the model
id_en_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-id-en")
#Defined some pairs for chatbot response
pairs = [
[
"your answer is wrong",
"Sorry for providing wrong answer, here is the newest answer:\n\n",
"I am sorry that I can't actually answer your question =("
],
[
"jawapan anda adalah salah",
"Maaf sedangkan memberi jawapan yang salah. Berikut adalah jawapan yang baru:\n\n",
"Minta Maaf, saya tidak boleh menemukan soalan anda =("
]]
#Function for checking reiterate providing answer for same question
def checkReiterateQuery(query,lang):
if query in [pairs[0][0],pairs[1][0]]:
if lang == 'en':
j = 0
else:
j = 1
return True, j
else:
return False , 3
import gradio as gr
from langdetect import detect
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
chat_history = []
answer_counter = 0
def chatbot_interface(message):
global answer_counter
global result
# Append the current message to the chat history
chat_history.append(message)
lang = detect(message)
reiterate, j = checkReiterateQuery(message, lang)
#If user want to re-iterate the answer for same question
if reiterate:
answer_counter = answer_counter + 1
if answer_counter < 5:
retrieved_main_answer = pairs[j][1] + result['answers'][answer_counter].answer
retrieved_main_context = result['answers'][answer_counter].context
else:
retrieved_main_answer = pairs[j][2]
retrieved_main_context = ""
else:
answer_counter = 0
#if language is english then convert it to malay language
if lang == "en":
tokenized_text = en_id_tokenizer.prepare_seq2seq_batch([message], return_tensors='pt')
translation = en_id_model.generate(**tokenized_text)
message = en_id_tokenizer.batch_decode(translation, skip_special_tokens=True)[0]
result = pipeline.run(query=message.lower(), params={
"Retriever": {"top_k": 5},
"Reader": {"top_k": 5}})
retrieved_main_answer = result['answers'][answer_counter].answer
retrieved_main_context = result['answers'][answer_counter].context
response = retrieved_main_answer + ", " + retrieved_main_context
#Convert the response to english if user ask question in english
if lang == "en":
tokenized_text = id_en_tokenizer.prepare_seq2seq_batch([response.lower()], return_tensors='pt')
translation = id_en_model.generate(**tokenized_text)
response = id_en_tokenizer.batch_decode(translation, skip_special_tokens=True)[0]
# Append the response to the chat history
chat_history.append(response)
# Join the chat history with newline characters
chat_history_text = "\n\n".join(chat_history)
return response, chat_history_text
# Create a Gradio interface
iface = gr.Interface(
fn=chatbot_interface,
inputs=gr.inputs.Textbox(label="Please Type Your Question Here: "),
outputs=[gr.outputs.Textbox(label="Answers"), gr.outputs.Textbox(label="Chat History")],
description="## Question Answering system\n\nIt supports **English** and **Bahasa Malaysia**.",
allow_flagging = False
)
#Demo for the chatbot
io.launch(inline = False)