import streamlit as st from transformers import pipeline import re import time import requests API_URL = "https://api-inference.huggingface.co/models/microsoft/prophetnet-large-uncased-squad-qg" headers = {"Authorization": "Bearer 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"} def query(payload): response = requests.post(API_URL, headers=headers, json=payload) return response.json() #----------------------------------------------------------- API_URL_evidence ="https://api-inference.huggingface.co/models/google/flan-t5-xxl" headers_evidence = {"Authorization": "Bearer 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"} def query_evidence(payload): response = requests.post(API_URL_evidence, headers=headers_evidence, json=payload) return response.json() #----------------------------------------------------------- st.title('Welcome to :blue[FACTIFY - 5WQA] ') st.header('5W Aspect-based Fact Verification through Question Answering :blue[Web Demo]') st.subheader('Here are a few steps to begin exploring and interacting with this demo.') st.caption('First you need to input your claim.') st.caption('Then you need to input your evidence and press **:green[Submit]**.') st.caption('Upon completing these two steps, kindly wait for a minute to receive the results.') st.caption('Start by inputting the following instance of a claim and corresponding evidence into the designated text fields.') #----------------------------------------------------------------------------------------------- st.caption('**Example 1**') st.caption(''':green[Claim:] :point_right: Amazon announced on March 16 it would hire 100,000 new warehouse and delivery workers and raise wages $ 2 per hour through April in response to increased demand for its services because of the coronavirus pandemic .''') st.caption(''':green[Evidence:] :point_right: Due to the consumers increasingly relying on online retailers, Amazon planned to hire over 99,000 workers in the warehouse and delivery sector during the Pandemic in the USA.''') #----------------------------------------------------------------------------------------------- st.caption('**Example 2**') st.caption(''':green[Claim:] :point_right: In China, Buddhist monks and nuns lived together in places such as the Yunnan monastery.''') st.caption(''':green[Evidence:] :point_right: Monastics in Japan are particularly exceptional in the Buddhist tradition because the monks and nuns can marry after receiving their higher ordination . ''') #----------------------------------------------------------------------------------------------- st.caption('**Example 3**') st.caption(''':green[Claim:] :point_right: In Batman, Penguin hydrates the henchmen with water contaminated with atomic waste.''') st.caption(''':green[Evidence:] :point_right: And Penguin even schemes his way into the Batcave along with five dehydrated henchmen ; this plan fails when the henchmen are unexpectedly killed when he mistakenly rehydrates them with heavy water contaminated with atomic waste , regularly used to recharge the Batcave s atomic pile . ''') #----------------------------------------------------------- def proc(): st.write(st.session_state.text_key) # st.text_area('enter text', on_change=proc, key='text_key') claim_text=st.text_area("Enter your claim:", on_change=proc, key='text_key') # form_claim = st.form(key='my_claim') # form_claim.text_input(label='Enter your claim') # claim_text = form_claim.form_submit_button(label='Submit') # evidence_text=st.text_area("Enter your evidence:") form_evidence = st.form(key='my_evidence') form_evidence.text_input(label='Enter your evidence') evidence_text = form_evidence.form_submit_button(label='Submit') if evidence_text: st.caption(':green[Kindly hold on for a few minutes while the QA pairs are being generated]') st.caption(':blue[At times, you may encounter null/none outputs, which could be a result of a delay in loading the models through the API. If you experience this problem, kindly try again after a few minutes.]') import pandas as pd from rouge_score import rouge_scorer import numpy as np from allennlp.predictors.predictor import Predictor import allennlp_models.tagging predictor = Predictor.from_path("structured-prediction-srl-bert.tar.gz") #--------------------------------------------------------------- def claim(text): import re def remove_special_chars(text): # Remove special characters that are not in between numbers text = re.sub(r'(?".join(who) # else: # continue #----------FOR COLUMN "WHAT"------------# df['what'] = '' for j in range(len(df['modified'])): val_list = [] val_string = '' for k,v in df['modified'][j].items(): # print(type(v)) val_list.append(v) what = [] for indx in range(len(val_list)): val_string = val_list[indx] pos = val_string.find("what: ") substr = '' if pos != -1: for i in range(pos+6, len(val_string)): if val_string[i] == "]": break else: substr = substr + val_string[i] else: pass if len(substr)!= 0: what.append(substr) else: pass df['what'][j] = "".join(what) # else: # continue #----------FOR COLUMN "WHY"------------# df['why'] = '' for j in range(len(df['modified'])): val_list = [] val_string = '' for k,v in df['modified'][j].items(): # print(type(v)) val_list.append(v) why = [] for indx in range(len(val_list)): val_string = val_list[indx] pos = val_string.find("why: ") substr = '' if pos != -1: for i in range(pos+5, len(val_string)): if val_string[i] == "]": break else: substr = substr + val_string[i] else: pass if len(substr)!= 0: why.append(substr) else: pass df['why'][j] = "".join(why) # else: # continue #----------FOR COLUMN "WHEN"------------# df['when'] = '' for j in range(len(df['modified'])): val_list = [] val_string = '' for k,v in df['modified'][j].items(): # print(type(v)) val_list.append(v) when = [] for indx in range(len(val_list)): val_string = val_list[indx] pos = val_string.find("when: ") substr = '' if pos != -1: for i in range(pos+6, len(val_string)): if val_string[i] == "]": break else: substr = substr + val_string[i] else: pass if len(substr)!= 0: when.append(substr) else: pass df['when'][j] = "".join(when) # else: # continue #----------FOR COLUMN "WHERE"------------# df['where'] = '' for j in range(len(df['modified'])): val_list = [] val_string = '' for k,v in df['modified'][j].items(): # print(type(v)) val_list.append(v) where = [] for indx in range(len(val_list)): val_string = val_list[indx] pos = val_string.find("where: ") substr = '' if pos != -1: for i in range(pos+7, len(val_string)): if val_string[i] == "]": break else: substr = substr + val_string[i] else: pass if len(substr)!= 0: where.append(substr) else: pass df['where'][j] = "".join(where) data=df[["claim","who","what","why","when","where"]].copy() return data #------------------------------------------------------------------------- def split_ws(input_list, delimiter=""): output_list = [] for item in input_list: split_item = item.split(delimiter) for sub_item in split_item: sub_item = sub_item.strip() if sub_item: output_list.append(sub_item) return output_list #-------------------------------------------------------------------------- def calc_rouge_l_score(list_of_evidence, list_of_ans): scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True) scores = scorer.score(' '.join(list_of_evidence), ' '.join(list_of_ans)) return scores['rougeL'].fmeasure #------------------------------------------------------------------------- def rephrase_question_who(question): if not question.lower().startswith("who"): words = question.split() words[0] = "Who" return " ".join(words) else: return question #------------------------------------------------------------------------ def gen_qa_who(df): list_of_ques_who=[] list_of_ans_who=[] list_of_evidence_answer_who=[] rouge_l_scores=[] for i,row in df.iterrows(): srl=df["who"][i] claim=df['claim'][i] answer= split_ws(df["who"]) evidence=df["evidence"][i] #time.sleep(10) if srl!="": try: for j in range(0,len(answer)): FACT_TO_GENERATE_QUESTION_FROM = f"""generate_the_question_from_context_using_the_next_answer:{answer[j]} [SEP] context:{claim}""" #FACT_TO_GENERATE_QUESTION_FROM = f"""generate_who_based_question_from_context_using_the_next_answer:{answer[j]} [SEP] context:{claim}""" #time.sleep(10) question_ids = query({"inputs":FACT_TO_GENERATE_QUESTION_FROM, "num_beams":5, "early_stopping":True, "min_length": 100,"wait_for_model":True})[0]['generated_text'].capitalize() question_ids = rephrase_question_who(question_ids) list_of_ques_who.append(f"""Q{j+1}:{question_ids}""") list_of_ans_who.append(f"""Ans{j+1}:{answer[j]}""") input_evidence = f"answer_the_next_question_from_context: {question_ids} context: {evidence}" #time.sleep(10) answer_evidence = query_evidence({"inputs":input_evidence,"truncation":True,"wait_for_model":True})[0]['generated_text'] if answer_evidence.lower() in evidence.lower(): list_of_evidence_answer_who.append(f"""Evidence{j+1}:{answer_evidence}""") else: answer_evidence="" list_of_evidence_answer_who.append(f"""No mention of 'who'in any related documents.""") threshold = 0.2 list_of_pairs = [(answer_evidence, answer[j])] rouge_l_score = calc_rouge_l_score(answer_evidence, answer[j]) if rouge_l_score >= threshold: verification_status = 'Verified Valid' elif rouge_l_score == 0: verification_status = 'Not verifiable' else: verification_status = 'Verified False' rouge_l_scores.append(verification_status) except: pass else: list_of_ques_who="No claims" list_of_ans_who="" list_of_evidence_answer_who="No mention of 'who'in any related documents." rouge_l_scores="Not verifiable" return list_of_ques_who,list_of_ans_who,rouge_l_scores,list_of_evidence_answer_who #------------------------------------------------------------ def rephrase_question_what(question): if not question.lower().startswith("what"): words = question.split() words[0] = "What" return " ".join(words) else: return question #---------------------------------------------------------- def gen_qa_what(df): list_of_ques_what=[] list_of_ans_what=[] list_of_evidence_answer_what=[] rouge_l_scores=[] for i,row in df.iterrows(): srl=df["what"][i] claim=df['claim'][i] answer= split_ws(df["what"]) evidence=df["evidence"][i] #time.sleep(10) if srl!="": try: for j in range(0,len(answer)): FACT_TO_GENERATE_QUESTION_FROM = f"""generate_the_question_from_context_using_the_next_answer:{answer[j]} [SEP] context:{claim}""" #time.sleep(10) question_ids = query({"inputs":FACT_TO_GENERATE_QUESTION_FROM, "num_beams":5, "early_stopping":True, "min_length": 100,"wait_for_model":True})[0]['generated_text'].capitalize() question_ids = rephrase_question_what(question_ids) list_of_ques_what.append(f"""Q{j+1}:{question_ids}""") list_of_ans_what.append(f"""Ans{j+1}:{answer[j]}""") input_evidence = f"answer_the_next_question_from_context: {question_ids} context: {evidence}" #time.sleep(10) answer_evidence = query_evidence({"inputs":input_evidence,"truncation":True,"wait_for_model":True})[0]['generated_text'] if answer_evidence.lower() in evidence.lower(): list_of_evidence_answer_what.append(f"""Evidence{j+1}:{answer_evidence}""") else: answer_evidence="" list_of_evidence_answer_what.append(f"""No mention of 'what'in any related documents.""") threshold = 0.2 list_of_pairs = [(answer_evidence, answer[j])] rouge_l_score = calc_rouge_l_score(answer_evidence, answer[j]) if rouge_l_score >= threshold: verification_status = 'Verified Valid' elif rouge_l_score == 0: verification_status = 'Not verifiable' else: verification_status = 'Verified False' rouge_l_scores.append(verification_status) except: pass else: list_of_ques_what="No claims" list_of_ans_what="" list_of_evidence_answer_what="No mention of 'what'in any related documents." rouge_l_scores="Not verifiable" return list_of_ques_what,list_of_ans_what,rouge_l_scores,list_of_evidence_answer_what #---------------------------------------------------------- def rephrase_question_why(question): if not question.lower().startswith("why"): words = question.split() words[0] = "Why" return " ".join(words) else: return question #--------------------------------------------------------- def gen_qa_why(df): list_of_ques_why=[] list_of_ans_why=[] list_of_evidence_answer_why=[] rouge_l_scores=[] for i,row in df.iterrows(): srl=df["why"][i] claim=df['claim'][i] answer= split_ws(df["why"]) evidence=df["evidence"][i] #time.sleep(10) if srl!="": try: for j in range(0,len(answer)): FACT_TO_GENERATE_QUESTION_FROM = f"""generate_the_question_from_context_using_the_next_answer:{answer[j]} [SEP] context:{claim}""" #time.sleep(10) question_ids = query({"inputs":FACT_TO_GENERATE_QUESTION_FROM, "num_beams":5, "early_stopping":True, "min_length": 100,"wait_for_model":True})[0]['generated_text'].capitalize() question_ids = rephrase_question_why(question_ids) list_of_ques_why.append(f"""Q{j+1}:{question_ids}""") list_of_ans_why.append(f"""Ans{j+1}:{answer[j]}""") input_evidence = f"answer_the_next_question_from_context: {question_ids} context: {evidence}" #time.sleep(10) answer_evidence = query_evidence({"inputs":input_evidence,"truncation":True,"wait_for_model":True})[0]['generated_text'] if answer_evidence.lower() in evidence.lower(): list_of_evidence_answer_why.append(f"""Evidence{j+1}:{answer_evidence}""") else: answer_evidence="" list_of_evidence_answer_why.append(f"""No mention of 'why'in any related documents.""") threshold = 0.2 list_of_pairs = [(answer_evidence, answer[j])] rouge_l_score = calc_rouge_l_score(answer_evidence, answer[j]) if rouge_l_score >= threshold: verification_status = 'Verified Valid' elif rouge_l_score == 0: verification_status = 'Not verifiable' else: verification_status = 'Verified False' rouge_l_scores.append(verification_status) except: pass else: list_of_ques_why="No claims" list_of_ans_why="" list_of_evidence_answer_why="No mention of 'why'in any related documents." rouge_l_scores="Not verifiable" return list_of_ques_why,list_of_ans_why,rouge_l_scores,list_of_evidence_answer_why #--------------------------------------------------------- def rephrase_question_when(question): if not question.lower().startswith("when"): words = question.split() words[0] = "When" return " ".join(words) else: return question #--------------------------------------------------------- def gen_qa_when(df): list_of_ques_when=[] list_of_ans_when=[] list_of_evidence_answer_when=[] rouge_l_scores=[] for i,row in df.iterrows(): srl=df["when"][i] claim=df['claim'][i] answer= split_ws(df["when"]) evidence=df["evidence"][i] #time.sleep(10) if srl!="": try: for j in range(0,len(answer)): FACT_TO_GENERATE_QUESTION_FROM = f"""generate_the_question_from_context_using_the_next_answer:{answer[j]} [SEP] context:{claim}""" #time.sleep(10) question_ids = query({"inputs":FACT_TO_GENERATE_QUESTION_FROM, "num_beams":5, "early_stopping":True, "min_length": 100,"wait_for_model":True})[0]['generated_text'].capitalize() question_ids = rephrase_question_when(question_ids) list_of_ques_when.append(f"""Q{j+1}:{question_ids}""") list_of_ans_when.append(f"""Ans{j+1}:{answer[j]}""") input_evidence = f"answer_the_next_question_from_context: {question_ids} context: {evidence}" #time.sleep(10) answer_evidence = query_evidence({"inputs":input_evidence,"truncation":True,"wait_for_model":True})[0]['generated_text'] if answer_evidence.lower() in evidence.lower(): list_of_evidence_answer_when.append(f"""Evidence{j+1}:{answer_evidence}""") else: answer_evidence="" list_of_evidence_answer_when.append(f"""No mention of 'when'in any related documents.""") threshold = 0.2 list_of_pairs = [(answer_evidence, answer[j])] rouge_l_score = calc_rouge_l_score(answer_evidence, answer[j]) if rouge_l_score >= threshold: verification_status = 'Verified Valid' elif rouge_l_score == 0: verification_status = 'Not verifiable' else: verification_status = 'Verified False' rouge_l_scores.append(verification_status) except: pass else: list_of_ques_when="No claims" list_of_ans_when="" list_of_evidence_answer_when="No mention of 'when'in any related documents." rouge_l_scores="Not verifiable" return list_of_ques_when,list_of_ans_when,rouge_l_scores,list_of_evidence_answer_when #------------------------------------------------------ def rephrase_question_where(question): if not question.lower().startswith("where"): words = question.split() words[0] = "Where" return " ".join(words) else: return question #------------------------------------------------------ def gen_qa_where(df): list_of_ques_where=[] list_of_ans_where=[] list_of_evidence_answer_where=[] rouge_l_scores=[] for i,row in df.iterrows(): srl=df["where"][i] claim=df['claim'][i] answer= split_ws(df["where"]) evidence=df["evidence"][i] #time.sleep(10) if srl!="": try: for j in range(0,len(answer)): FACT_TO_GENERATE_QUESTION_FROM = f"""generate_the_question_from_context_using_the_next_answer:{answer[j]} [SEP] context:{claim}""" #time.sleep(10) question_ids = query({"inputs":FACT_TO_GENERATE_QUESTION_FROM, "num_beams":5, "early_stopping":True, "min_length": 100,"wait_for_model":True})[0]['generated_text'].capitalize() question_ids = rephrase_question_where(question_ids) list_of_ques_where.append(f"""Q{j+1}:{question_ids}""") list_of_ans_where.append(f"""Ans{j+1}:{answer[j]}""") input_evidence = f"answer_the_next_question_from_context: {question_ids} context: {evidence}" #time.sleep(10) answer_evidence = query_evidence({"inputs":input_evidence,"truncation":True,"wait_for_model":True})[0]['generated_text'] if answer_evidence.lower() in evidence.lower(): list_of_evidence_answer_where.append(f"""Evidence{j+1}:{answer_evidence}""") else: answer_evidence="" list_of_evidence_answer_where.append(f"""No mention of 'where'in any related documents.""") threshold = 0.2 list_of_pairs = [(answer_evidence, answer[j])] rouge_l_score = calc_rouge_l_score(answer_evidence, answer[j]) if rouge_l_score >= threshold: verification_status = 'Verified Valid' elif rouge_l_score == 0: verification_status = 'Not verifiable' else: verification_status = 'Verified False' rouge_l_scores.append(verification_status) except: pass else: list_of_ques_where="No claims" list_of_ans_where="" list_of_evidence_answer_where="No mention of 'where'in any related documents." rouge_l_scores="Not verifiable" return list_of_ques_where,list_of_ans_where,rouge_l_scores,list_of_evidence_answer_where #------------------------------------------------------ #------------------------------------------------------------ if claim_text: if evidence_text: df=claim(claim_text) df["evidence"]=evidence_text final_df = pd.DataFrame(columns=['Who Claims', 'What Claims', 'When Claims', 'Where Claims', 'Why Claims']) final_df["Who Claims"]=gen_qa_who(df) final_df["What Claims"]=gen_qa_what(df) final_df["When Claims"]=gen_qa_when(df) final_df["Where Claims"]=gen_qa_where(df) final_df["Why Claims"]=gen_qa_why(df) st.dataframe(final_df) # a,b=qa_evidence(final_data) # qa_evidence(final_data) # st.json(qa_evidence(final_data)) # st.json({'QA pair from claim':[{"Question": qu, "Answer": an} for qu, an in a], # 'QA pair from evidence':[{"Question": qu, "Answer": an} for qu, an in b]})