from haystack.document_stores.memory import InMemoryDocumentStore from haystack.nodes import TfidfRetriever, FARMReader import pickle pickle_file = 'knowledge_graph.pickle' # Load the knowledge graph from the pickle file with open(pickle_file, 'rb') as f: knowledge_graph = pickle.load(f) document_store = InMemoryDocumentStore() node_sentences = {} documents = [] nodes = [node for node in knowledge_graph.nodes() if node is not None] for node in nodes: # Get all the edges related to the current node related_edges = [edge for edge in knowledge_graph.edges() if edge[0] == node or edge[1] == node] # Get the parents and grandparents of the current node parents = [edge[0] for edge in related_edges if edge[1] == node] grandparents = [] for parent in parents: grandparents.extend([edge[0] for edge in related_edges if edge[1] == parent]) # Get the children and grandchildren of the current node children = [edge[1] for edge in related_edges if edge[0] == node] grandchildren = [] for child in children: grandchildren.extend([edge[1] for edge in related_edges if edge[0] == child]) # Create the sentence by combining all the related nodes sentence_parts = grandparents + parents + [node] + children + grandchildren sentence = ' '.join(sentence_parts) # Store the sentence for the current node node_sentences[node] = sentence # Create the document with the node and the sentence as the content documents.append({'text': node, 'content': sentence}) document_store.write_documents(documents) #Initialize the retriever retriever = TfidfRetriever(document_store=document_store) #Initialize the reader model_name = "primasr/multilingualbert-for-eqa-finetuned" reader = FARMReader(model_name_or_path=model_name, use_gpu=False) #Create pipeline with the component of retriever and reader from haystack.pipelines import Pipeline pipeline = Pipeline() pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"]) pipeline.add_node(component=reader, name="Reader", inputs=["Retriever"]) from transformers import AutoTokenizer, AutoModelForSeq2SeqLM # Targeted to Translate English queries to Malay Language # Initialize the tokenizer en_id_tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-id") # Initialize the model en_id_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-id") # Targeted to Translate Malay Answer to English Language # Initialize the tokenizer id_en_tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-id-en") # Initialize the model id_en_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-id-en") #Defined some pairs for chatbot response pairs = [ [ "your answer is wrong", "Sorry for providing wrong answer, here is the newest answer:\n\n", "I am sorry that I can't actually answer your question =(" ], [ "jawapan anda adalah salah", "Maaf sedangkan memberi jawapan yang salah. Berikut adalah jawapan yang baru:\n\n", "Minta Maaf, saya tidak boleh menemukan soalan anda =(" ]] #Function for checking reiterate providing answer for same question def checkReiterateQuery(query,lang): if query in [pairs[0][0],pairs[1][0]]: if lang == 'en': j = 0 else: j = 1 return True, j else: return False , 3 import gradio as gr from langdetect import detect from transformers import AutoTokenizer, AutoModelForSeq2SeqLM chat_history = [] answer_counter = 0 def chatbot_interface(message): global answer_counter global result # Append the current message to the chat history chat_history.append(message) lang = detect(message) reiterate, j = checkReiterateQuery(message, lang) #If user want to re-iterate the answer for same question if reiterate: answer_counter = answer_counter + 1 if answer_counter < 5: retrieved_main_answer = pairs[j][1] + result['answers'][answer_counter].answer retrieved_main_context = result['answers'][answer_counter].context else: retrieved_main_answer = pairs[j][2] retrieved_main_context = "" else: answer_counter = 0 #if language is english then convert it to malay language if lang == "en": tokenized_text = en_id_tokenizer.prepare_seq2seq_batch([message], return_tensors='pt') translation = en_id_model.generate(**tokenized_text) message = en_id_tokenizer.batch_decode(translation, skip_special_tokens=True)[0] result = pipeline.run(query=message.lower(), params={ "Retriever": {"top_k": 5}, "Reader": {"top_k": 5}}) retrieved_main_answer = result['answers'][answer_counter].answer retrieved_main_context = result['answers'][answer_counter].context response = retrieved_main_answer + ", " + retrieved_main_context #Convert the response to english if user ask question in english if lang == "en": tokenized_text = id_en_tokenizer.prepare_seq2seq_batch([response.lower()], return_tensors='pt') translation = id_en_model.generate(**tokenized_text) response = id_en_tokenizer.batch_decode(translation, skip_special_tokens=True)[0] # Append the response to the chat history chat_history.append(response) # Join the chat history with newline characters chat_history_text = "\n\n".join(chat_history) return response, chat_history_text # Create a Gradio interface iface = gr.Interface( fn=chatbot_interface, inputs=gr.inputs.Textbox(label="Please Type Your Question Here: "), outputs=[gr.outputs.Textbox(label="Answers"), gr.outputs.Textbox(label="Chat History")], description="## Question Answering system\n\nIt supports **English** and **Bahasa Malaysia**.", allow_flagging = False ) #Demo for the chatbot io.launch(inline = False)