Spaces:
Runtime error
Runtime error
from haystack.document_stores.memory import InMemoryDocumentStore | |
from haystack.nodes import TfidfRetriever, FARMReader | |
import pickle | |
pickle_file = 'knowledge_graph.pickle' | |
# Load the knowledge graph from the pickle file | |
with open(pickle_file, 'rb') as f: | |
knowledge_graph = pickle.load(f) | |
document_store = InMemoryDocumentStore() | |
node_sentences = {} | |
documents = [] | |
nodes = [node for node in knowledge_graph.nodes() if node is not None] | |
for node in nodes: | |
# Get all the edges related to the current node | |
related_edges = [edge for edge in knowledge_graph.edges() if edge[0] == node or edge[1] == node] | |
# Get the parents and grandparents of the current node | |
parents = [edge[0] for edge in related_edges if edge[1] == node] | |
grandparents = [] | |
for parent in parents: | |
grandparents.extend([edge[0] for edge in related_edges if edge[1] == parent]) | |
# Get the children and grandchildren of the current node | |
children = [edge[1] for edge in related_edges if edge[0] == node] | |
grandchildren = [] | |
for child in children: | |
grandchildren.extend([edge[1] for edge in related_edges if edge[0] == child]) | |
# Create the sentence by combining all the related nodes | |
sentence_parts = grandparents + parents + [node] + children + grandchildren | |
sentence = ' '.join(sentence_parts) | |
# Store the sentence for the current node | |
node_sentences[node] = sentence | |
# Create the document with the node and the sentence as the content | |
documents.append({'text': node, 'content': sentence}) | |
document_store.write_documents(documents) | |
#Initialize the retriever | |
retriever = TfidfRetriever(document_store=document_store) | |
#Initialize the reader | |
model_name = "primasr/multilingualbert-for-eqa-finetuned" | |
reader = FARMReader(model_name_or_path=model_name, use_gpu=False) | |
#Create pipeline with the component of retriever and reader | |
from haystack.pipelines import Pipeline | |
pipeline = Pipeline() | |
pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"]) | |
pipeline.add_node(component=reader, name="Reader", inputs=["Retriever"]) | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
# Targeted to Translate English queries to Malay Language | |
# Initialize the tokenizer | |
en_id_tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-id") | |
# Initialize the model | |
en_id_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-id") | |
# Targeted to Translate Malay Answer to English Language | |
# Initialize the tokenizer | |
id_en_tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-id-en") | |
# Initialize the model | |
id_en_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-id-en") | |
#Defined some pairs for chatbot response | |
pairs = [ | |
[ | |
"your answer is wrong", | |
"Sorry for providing wrong answer, here is the newest answer:\n\n", | |
"I am sorry that I can't actually answer your question =(" | |
], | |
[ | |
"jawapan anda adalah salah", | |
"Maaf sedangkan memberi jawapan yang salah. Berikut adalah jawapan yang baru:\n\n", | |
"Minta Maaf, saya tidak boleh menemukan soalan anda =(" | |
]] | |
#Function for checking reiterate providing answer for same question | |
def checkReiterateQuery(query,lang): | |
if query in [pairs[0][0],pairs[1][0]]: | |
if lang == 'en': | |
j = 0 | |
else: | |
j = 1 | |
return True, j | |
else: | |
return False , 3 | |
import gradio as gr | |
from langdetect import detect | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
chat_history = [] | |
answer_counter = 0 | |
def chatbot_interface(message): | |
global answer_counter | |
global result | |
# Append the current message to the chat history | |
chat_history.append(message) | |
lang = detect(message) | |
reiterate, j = checkReiterateQuery(message, lang) | |
#If user want to re-iterate the answer for same question | |
if reiterate: | |
answer_counter = answer_counter + 1 | |
if answer_counter < 5: | |
retrieved_main_answer = pairs[j][1] + result['answers'][answer_counter].answer | |
retrieved_main_context = result['answers'][answer_counter].context | |
else: | |
retrieved_main_answer = pairs[j][2] | |
retrieved_main_context = "" | |
else: | |
answer_counter = 0 | |
#if language is english then convert it to malay language | |
if lang == "en": | |
tokenized_text = en_id_tokenizer.prepare_seq2seq_batch([message], return_tensors='pt') | |
translation = en_id_model.generate(**tokenized_text) | |
message = en_id_tokenizer.batch_decode(translation, skip_special_tokens=True)[0] | |
result = pipeline.run(query=message.lower(), params={ | |
"Retriever": {"top_k": 5}, | |
"Reader": {"top_k": 5}}) | |
retrieved_main_answer = result['answers'][answer_counter].answer | |
retrieved_main_context = result['answers'][answer_counter].context | |
response = retrieved_main_answer + ", " + retrieved_main_context | |
#Convert the response to english if user ask question in english | |
if lang == "en": | |
tokenized_text = id_en_tokenizer.prepare_seq2seq_batch([response.lower()], return_tensors='pt') | |
translation = id_en_model.generate(**tokenized_text) | |
response = id_en_tokenizer.batch_decode(translation, skip_special_tokens=True)[0] | |
# Append the response to the chat history | |
chat_history.append(response) | |
# Join the chat history with newline characters | |
chat_history_text = "\n\n".join(chat_history) | |
return response, chat_history_text | |
# Create a Gradio interface | |
iface = gr.Interface( | |
fn=chatbot_interface, | |
inputs=gr.inputs.Textbox(label="Please Type Your Question Here: "), | |
outputs=[gr.outputs.Textbox(label="Answers"), gr.outputs.Textbox(label="Chat History")], | |
description="## Question Answering system\n\nIt supports **English** and **Bahasa Malaysia**.", | |
allow_flagging = False | |
) | |
#Demo for the chatbot | |
io.launch(inline = False) | |