Spaces:

LTW-1181203031
/

Group13_NLPProject

Runtime error

App Files Files Community

Group13_NLPProject / app.py

LTW-1181203031

Update app.py

fdc8d43 over 1 year ago

raw

history blame

5.77 kB

	from haystack.document_stores.memory import InMemoryDocumentStore
	from haystack.nodes import TfidfRetriever, FARMReader

	import pickle

	pickle_file = 'knowledge_graph.pickle'

	# Load the knowledge graph from the pickle file
	with open(pickle_file, 'rb') as f:
	knowledge_graph = pickle.load(f)

	document_store = InMemoryDocumentStore()
	node_sentences = {}
	documents = []
	nodes = [node for node in knowledge_graph.nodes() if node is not None]

	for node in nodes:
	# Get all the edges related to the current node
	related_edges = [edge for edge in knowledge_graph.edges() if edge[0] == node or edge[1] == node]

	# Get the parents and grandparents of the current node
	parents = [edge[0] for edge in related_edges if edge[1] == node]
	grandparents = []
	for parent in parents:
	grandparents.extend([edge[0] for edge in related_edges if edge[1] == parent])

	# Get the children and grandchildren of the current node
	children = [edge[1] for edge in related_edges if edge[0] == node]
	grandchildren = []
	for child in children:
	grandchildren.extend([edge[1] for edge in related_edges if edge[0] == child])

	# Create the sentence by combining all the related nodes
	sentence_parts = grandparents + parents + [node] + children + grandchildren
	sentence = ' '.join(sentence_parts)

	# Store the sentence for the current node
	node_sentences[node] = sentence

	# Create the document with the node and the sentence as the content
	documents.append({'text': node, 'content': sentence})
	document_store.write_documents(documents)

	#Initialize the retriever
	retriever = TfidfRetriever(document_store=document_store)

	#Initialize the reader
	model_name = "primasr/multilingualbert-for-eqa-finetuned"
	reader = FARMReader(model_name_or_path=model_name, use_gpu=False)

	#Create pipeline with the component of retriever and reader
	from haystack.pipelines import Pipeline
	pipeline = Pipeline()
	pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"])
	pipeline.add_node(component=reader, name="Reader", inputs=["Retriever"])

	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
	# Targeted to Translate English queries to Malay Language
	# Initialize the tokenizer
	en_id_tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-id")
	# Initialize the model
	en_id_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-id")

	# Targeted to Translate Malay Answer to English Language
	# Initialize the tokenizer
	id_en_tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-id-en")
	# Initialize the model
	id_en_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-id-en")

	#Defined some pairs for chatbot response
	pairs = [
	[
	"your answer is wrong",
	"Sorry for providing wrong answer, here is the newest answer:\n\n",
	"I am sorry that I can't actually answer your question =("
	],
	[
	"jawapan anda adalah salah",
	"Maaf sedangkan memberi jawapan yang salah. Berikut adalah jawapan yang baru:\n\n",
	"Minta Maaf, saya tidak boleh menemukan soalan anda =("
	]]

	#Function for checking reiterate providing answer for same question
	def checkReiterateQuery(query,lang):
	if query in [pairs[0][0],pairs[1][0]]:
	if lang == 'en':
	j = 0
	else:
	j = 1
	return True, j

	else:
	return False , 3

	import gradio as gr
	from langdetect import detect
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

	chat_history = []
	answer_counter = 0

	def chatbot_interface(message):
	global answer_counter
	global result

	# Append the current message to the chat history
	chat_history.append(message)
	lang = detect(message)
	reiterate, j = checkReiterateQuery(message, lang)

	#If user want to re-iterate the answer for same question
	if reiterate:
	answer_counter = answer_counter + 1
	if answer_counter < 5:
	retrieved_main_answer = pairs[j][1] + result['answers'][answer_counter].answer
	retrieved_main_context = result['answers'][answer_counter].context
	else:
	retrieved_main_answer = pairs[j][2]
	retrieved_main_context = ""
	else:
	answer_counter = 0
	#if language is english then convert it to malay language
	if lang == "en":
	tokenized_text = en_id_tokenizer.prepare_seq2seq_batch([message], return_tensors='pt')
	translation = en_id_model.generate(**tokenized_text)
	message = en_id_tokenizer.batch_decode(translation, skip_special_tokens=True)[0]

	result = pipeline.run(query=message.lower(), params={
	"Retriever": {"top_k": 5},
	"Reader": {"top_k": 5}})
	retrieved_main_answer = result['answers'][answer_counter].answer
	retrieved_main_context = result['answers'][answer_counter].context

	response = retrieved_main_answer + ", " + retrieved_main_context

	#Convert the response to english if user ask question in english
	if lang == "en":
	tokenized_text = id_en_tokenizer.prepare_seq2seq_batch([response.lower()], return_tensors='pt')
	translation = id_en_model.generate(**tokenized_text)
	response = id_en_tokenizer.batch_decode(translation, skip_special_tokens=True)[0]

	# Append the response to the chat history
	chat_history.append(response)

	# Join the chat history with newline characters
	chat_history_text = "\n\n".join(chat_history)

	return response, chat_history_text

	# Create a Gradio interface
	iface = gr.Interface(
	fn=chatbot_interface,
	inputs=gr.inputs.Textbox(label="Please Type Your Question Here: "),
	outputs=[gr.outputs.Textbox(label="Answers"), gr.outputs.Textbox(label="Chat History")],
	description="## Question Answering system\n\nIt supports English and Bahasa Malaysia.",
	allow_flagging = False
	)

	#Demo for the chatbot
	io.launch(inline = False)