5WQA / app.py
Towhidul's picture
Update app.py
718d231
raw
history blame
26.8 kB
import streamlit as st
from transformers import pipeline
import re
import time
import requests
API_URL = "https://api-inference.huggingface.co/models/microsoft/prophetnet-large-uncased-squad-qg"
headers = {"Authorization": "Bearer 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"}
def query(payload):
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()
#-----------------------------------------------------------
API_URL_evidence ="https://api-inference.huggingface.co/models/google/flan-t5-xxl"
headers_evidence = {"Authorization": "Bearer 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"}
def query_evidence(payload):
response = requests.post(API_URL_evidence, headers=headers_evidence, json=payload)
return response.json()
#-----------------------------------------------------------
st.title('Welcome to :blue[FACTIFY - 5WQA] ')
st.header('5W Aspect-based Fact Verification through Question Answering :blue[Web Demo]')
st.subheader('Here are a few steps to begin exploring and interacting with this demo.')
st.caption('First you need to input your claim.')
st.caption('Then you need to input your evidence and press **:green[Submit]**.')
st.caption('Upon completing these two steps, kindly wait for a minute to receive the results.')
st.caption('Start by inputting the following instance of a claim and corresponding evidence into the designated text fields.')
#-----------------------------------------------------------------------------------------------
st.caption('**Example 1**')
st.caption(''':green[Claim:] :point_right: Amazon announced on March 16 it would hire 100,000 new warehouse and delivery workers and
raise wages $ 2 per hour through April in response to increased demand for its services because of the coronavirus pandemic .''')
st.caption(''':green[Evidence:] :point_right: Due to the consumers increasingly relying on online retailers,
Amazon planned to hire over 99,000 workers in the warehouse and delivery sector during the Pandemic in the USA.''')
#-----------------------------------------------------------------------------------------------
st.caption('**Example 2**')
st.caption(''':green[Claim:] :point_right: In China, Buddhist monks and nuns lived together in places such as the Yunnan monastery.''')
st.caption(''':green[Evidence:] :point_right: Monastics in Japan are particularly exceptional in the Buddhist tradition because the monks and nuns can marry after receiving their higher ordination . ''')
#-----------------------------------------------------------------------------------------------
st.caption('**Example 3**')
st.caption(''':green[Claim:] :point_right: In Batman, Penguin hydrates the henchmen with water contaminated with atomic waste.''')
st.caption(''':green[Evidence:] :point_right: And Penguin even schemes his way into the Batcave along with five dehydrated henchmen ;
this plan fails when the henchmen are unexpectedly killed
when he mistakenly rehydrates them with heavy water contaminated with atomic waste ,
regularly used to recharge the Batcave s atomic pile . ''')
#-----------------------------------------------------------
def proc():
st.write(st.session_state.text_key)
# st.text_area('enter text', on_change=proc, key='text_key')
claim_text=st.text_area("Enter your claim:", on_change=proc, key='text_key')
# form_claim = st.form(key='my_claim')
# form_claim.text_input(label='Enter your claim')
# claim_text = form_claim.form_submit_button(label='Submit')
# evidence_text=st.text_area("Enter your evidence:")
form_evidence = st.form(key='my_evidence')
form_evidence.text_input(label='Enter your evidence')
evidence_text = form_evidence.form_submit_button(label='Submit')
if evidence_text:
st.caption(':green[Kindly hold on for a few minutes while the QA pairs are being generated]')
st.caption(':blue[At times, you may encounter null/none outputs, which could be a result of a delay in loading the models through the API. If you experience this problem, kindly try again after a few minutes.]')
import pandas as pd
from rouge_score import rouge_scorer
import numpy as np
from allennlp.predictors.predictor import Predictor
import allennlp_models.tagging
predictor = Predictor.from_path("structured-prediction-srl-bert.tar.gz")
#---------------------------------------------------------------
def claim(text):
import re
def remove_special_chars(text):
# Remove special characters that are not in between numbers
text = re.sub(r'(?<!\d)[^\w\s]+(?!\d)', '', text)
return text
df = pd.DataFrame({'claim' : remove_special_chars(text)},index=[0])
def srl_allennlp(sent):
try:
#result = predictor.predict(sentence=sent)['verbs'][0]['description']
#result = predictor.predict(sentence=sent)['verbs'][0]['tags']
result = predictor.predict(sentence=sent)
return(result)
except IndexError:
pass
#return(predictor.predict(sentence=sent))
df['allennlp_srl'] = df['claim'].apply(lambda x: srl_allennlp(x))
df['number_of_verbs'] = ''
df['verbs_group'] = ''
df['words'] = ''
df['verbs'] = ''
df['modified'] =''
col1 = df['allennlp_srl']
for i in range(len(col1)):
num_verb = len(col1[i]['verbs'])
df['number_of_verbs'][i] = num_verb
df['verbs_group'][i] = col1[i]['verbs']
df['words'][i] = col1[i]['words']
x=[]
for verb in range(len(col1[i]['verbs'])):
x.append(col1[i]['verbs'][verb]['verb'])
df['verbs'][i] = x
verb_dict ={}
desc = []
for j in range(len(col1[i]['verbs'])):
string = (col1[i]['verbs'][j]['description'])
string = string.replace("ARG0", "who")
string = string.replace("ARG1", "what")
string = string.replace("ARGM-TMP", "when")
string = string.replace("ARGM-LOC", "where")
string = string.replace("ARGM-CAU", "why")
desc.append(string)
verb_dict[col1[i]['verbs'][j]['verb']]=string
df['modified'][i] = verb_dict
#----------FOR COLUMN "WHO"------------#
df['who'] = ''
for j in range(len(df['modified'])):
val_list = []
val_string = ''
for k,v in df['modified'][j].items():
# print(type(v))
val_list.append(v)
who = []
for indx in range(len(val_list)):
val_string = val_list[indx]
pos = val_string.find("who: ")
substr = ''
if pos != -1:
for i in range(pos+5, len(val_string)):
if val_string[i] == "]":
break
else:
substr = substr + val_string[i]
else:
pass
if len(substr)!= 0:
who.append(substr)
else:
pass
df['who'][j] = "<sep>".join(who)
# else:
# continue
#----------FOR COLUMN "WHAT"------------#
df['what'] = ''
for j in range(len(df['modified'])):
val_list = []
val_string = ''
for k,v in df['modified'][j].items():
# print(type(v))
val_list.append(v)
what = []
for indx in range(len(val_list)):
val_string = val_list[indx]
pos = val_string.find("what: ")
substr = ''
if pos != -1:
for i in range(pos+6, len(val_string)):
if val_string[i] == "]":
break
else:
substr = substr + val_string[i]
else:
pass
if len(substr)!= 0:
what.append(substr)
else:
pass
df['what'][j] = "<sep>".join(what)
# else:
# continue
#----------FOR COLUMN "WHY"------------#
df['why'] = ''
for j in range(len(df['modified'])):
val_list = []
val_string = ''
for k,v in df['modified'][j].items():
# print(type(v))
val_list.append(v)
why = []
for indx in range(len(val_list)):
val_string = val_list[indx]
pos = val_string.find("why: ")
substr = ''
if pos != -1:
for i in range(pos+5, len(val_string)):
if val_string[i] == "]":
break
else:
substr = substr + val_string[i]
else:
pass
if len(substr)!= 0:
why.append(substr)
else:
pass
df['why'][j] = "<sep>".join(why)
# else:
# continue
#----------FOR COLUMN "WHEN"------------#
df['when'] = ''
for j in range(len(df['modified'])):
val_list = []
val_string = ''
for k,v in df['modified'][j].items():
# print(type(v))
val_list.append(v)
when = []
for indx in range(len(val_list)):
val_string = val_list[indx]
pos = val_string.find("when: ")
substr = ''
if pos != -1:
for i in range(pos+6, len(val_string)):
if val_string[i] == "]":
break
else:
substr = substr + val_string[i]
else:
pass
if len(substr)!= 0:
when.append(substr)
else:
pass
df['when'][j] = "<sep>".join(when)
# else:
# continue
#----------FOR COLUMN "WHERE"------------#
df['where'] = ''
for j in range(len(df['modified'])):
val_list = []
val_string = ''
for k,v in df['modified'][j].items():
# print(type(v))
val_list.append(v)
where = []
for indx in range(len(val_list)):
val_string = val_list[indx]
pos = val_string.find("where: ")
substr = ''
if pos != -1:
for i in range(pos+7, len(val_string)):
if val_string[i] == "]":
break
else:
substr = substr + val_string[i]
else:
pass
if len(substr)!= 0:
where.append(substr)
else:
pass
df['where'][j] = "<sep>".join(where)
data=df[["claim","who","what","why","when","where"]].copy()
return data
#-------------------------------------------------------------------------
def split_ws(input_list, delimiter="<sep>"):
output_list = []
for item in input_list:
split_item = item.split(delimiter)
for sub_item in split_item:
sub_item = sub_item.strip()
if sub_item:
output_list.append(sub_item)
return output_list
#--------------------------------------------------------------------------
def calc_rouge_l_score(list_of_evidence, list_of_ans):
scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
scores = scorer.score(' '.join(list_of_evidence), ' '.join(list_of_ans))
return scores['rougeL'].fmeasure
#-------------------------------------------------------------------------
def rephrase_question_who(question):
if not question.lower().startswith("who"):
words = question.split()
words[0] = "Who"
return " ".join(words)
else:
return question
#------------------------------------------------------------------------
def gen_qa_who(df):
list_of_ques_who=[]
list_of_ans_who=[]
list_of_evidence_answer_who=[]
rouge_l_scores=[]
for i,row in df.iterrows():
srl=df["who"][i]
claim=df['claim'][i]
answer= split_ws(df["who"])
evidence=df["evidence"][i]
#time.sleep(10)
if srl!="":
try:
for j in range(0,len(answer)):
FACT_TO_GENERATE_QUESTION_FROM = f"""generate_the_question_from_context_using_the_next_answer:{answer[j]} [SEP] context:{claim}"""
#FACT_TO_GENERATE_QUESTION_FROM = f"""generate_who_based_question_from_context_using_the_next_answer:{answer[j]} [SEP] context:{claim}"""
#time.sleep(10)
question_ids = query({"inputs":FACT_TO_GENERATE_QUESTION_FROM,
"num_beams":5,
"early_stopping":True,
"min_length": 100,"wait_for_model":True})[0]['generated_text'].capitalize()
question_ids = rephrase_question_who(question_ids)
list_of_ques_who.append(f"""Q{j+1}:{question_ids}""")
list_of_ans_who.append(f"""Ans{j+1}:{answer[j]}""")
input_evidence = f"answer_the_next_question_from_context: {question_ids} context: {evidence}"
#time.sleep(10)
answer_evidence = query_evidence({"inputs":input_evidence,"truncation":True,"wait_for_model":True})[0]['generated_text']
if answer_evidence.lower() in evidence.lower():
list_of_evidence_answer_who.append(f"""Evidence{j+1}:{answer_evidence}""")
else:
answer_evidence=""
list_of_evidence_answer_who.append(f"""No mention of 'who'in any related documents.""")
threshold = 0.2
list_of_pairs = [(answer_evidence, answer[j])]
rouge_l_score = calc_rouge_l_score(answer_evidence, answer[j])
if rouge_l_score >= threshold:
verification_status = 'Verified Valid'
elif rouge_l_score == 0:
verification_status = 'Not verifiable'
else:
verification_status = 'Verified False'
rouge_l_scores.append(verification_status)
except:
pass
else:
list_of_ques_who="No claims"
list_of_ans_who=""
list_of_evidence_answer_who="No mention of 'who'in any related documents."
rouge_l_scores="Not verifiable"
return list_of_ques_who,list_of_ans_who,rouge_l_scores,list_of_evidence_answer_who
#------------------------------------------------------------
def rephrase_question_what(question):
if not question.lower().startswith("what"):
words = question.split()
words[0] = "What"
return " ".join(words)
else:
return question
#----------------------------------------------------------
def gen_qa_what(df):
list_of_ques_what=[]
list_of_ans_what=[]
list_of_evidence_answer_what=[]
rouge_l_scores=[]
for i,row in df.iterrows():
srl=df["what"][i]
claim=df['claim'][i]
answer= split_ws(df["what"])
evidence=df["evidence"][i]
#time.sleep(10)
if srl!="":
try:
for j in range(0,len(answer)):
FACT_TO_GENERATE_QUESTION_FROM = f"""generate_the_question_from_context_using_the_next_answer:{answer[j]} [SEP] context:{claim}"""
#time.sleep(10)
question_ids = query({"inputs":FACT_TO_GENERATE_QUESTION_FROM,
"num_beams":5,
"early_stopping":True,
"min_length": 100,"wait_for_model":True})[0]['generated_text'].capitalize()
question_ids = rephrase_question_what(question_ids)
list_of_ques_what.append(f"""Q{j+1}:{question_ids}""")
list_of_ans_what.append(f"""Ans{j+1}:{answer[j]}""")
input_evidence = f"answer_the_next_question_from_context: {question_ids} context: {evidence}"
#time.sleep(10)
answer_evidence = query_evidence({"inputs":input_evidence,"truncation":True,"wait_for_model":True})[0]['generated_text']
if answer_evidence.lower() in evidence.lower():
list_of_evidence_answer_what.append(f"""Evidence{j+1}:{answer_evidence}""")
else:
answer_evidence=""
list_of_evidence_answer_what.append(f"""No mention of 'what'in any related documents.""")
threshold = 0.2
list_of_pairs = [(answer_evidence, answer[j])]
rouge_l_score = calc_rouge_l_score(answer_evidence, answer[j])
if rouge_l_score >= threshold:
verification_status = 'Verified Valid'
elif rouge_l_score == 0:
verification_status = 'Not verifiable'
else:
verification_status = 'Verified False'
rouge_l_scores.append(verification_status)
except:
pass
else:
list_of_ques_what="No claims"
list_of_ans_what=""
list_of_evidence_answer_what="No mention of 'what'in any related documents."
rouge_l_scores="Not verifiable"
return list_of_ques_what,list_of_ans_what,rouge_l_scores,list_of_evidence_answer_what
#----------------------------------------------------------
def rephrase_question_why(question):
if not question.lower().startswith("why"):
words = question.split()
words[0] = "Why"
return " ".join(words)
else:
return question
#---------------------------------------------------------
def gen_qa_why(df):
list_of_ques_why=[]
list_of_ans_why=[]
list_of_evidence_answer_why=[]
rouge_l_scores=[]
for i,row in df.iterrows():
srl=df["why"][i]
claim=df['claim'][i]
answer= split_ws(df["why"])
evidence=df["evidence"][i]
#time.sleep(10)
if srl!="":
try:
for j in range(0,len(answer)):
FACT_TO_GENERATE_QUESTION_FROM = f"""generate_the_question_from_context_using_the_next_answer:{answer[j]} [SEP] context:{claim}"""
#time.sleep(10)
question_ids = query({"inputs":FACT_TO_GENERATE_QUESTION_FROM,
"num_beams":5,
"early_stopping":True,
"min_length": 100,"wait_for_model":True})[0]['generated_text'].capitalize()
question_ids = rephrase_question_why(question_ids)
list_of_ques_why.append(f"""Q{j+1}:{question_ids}""")
list_of_ans_why.append(f"""Ans{j+1}:{answer[j]}""")
input_evidence = f"answer_the_next_question_from_context: {question_ids} context: {evidence}"
#time.sleep(10)
answer_evidence = query_evidence({"inputs":input_evidence,"truncation":True,"wait_for_model":True})[0]['generated_text']
if answer_evidence.lower() in evidence.lower():
list_of_evidence_answer_why.append(f"""Evidence{j+1}:{answer_evidence}""")
else:
answer_evidence=""
list_of_evidence_answer_why.append(f"""No mention of 'why'in any related documents.""")
threshold = 0.2
list_of_pairs = [(answer_evidence, answer[j])]
rouge_l_score = calc_rouge_l_score(answer_evidence, answer[j])
if rouge_l_score >= threshold:
verification_status = 'Verified Valid'
elif rouge_l_score == 0:
verification_status = 'Not verifiable'
else:
verification_status = 'Verified False'
rouge_l_scores.append(verification_status)
except:
pass
else:
list_of_ques_why="No claims"
list_of_ans_why=""
list_of_evidence_answer_why="No mention of 'why'in any related documents."
rouge_l_scores="Not verifiable"
return list_of_ques_why,list_of_ans_why,rouge_l_scores,list_of_evidence_answer_why
#---------------------------------------------------------
def rephrase_question_when(question):
if not question.lower().startswith("when"):
words = question.split()
words[0] = "When"
return " ".join(words)
else:
return question
#---------------------------------------------------------
def gen_qa_when(df):
list_of_ques_when=[]
list_of_ans_when=[]
list_of_evidence_answer_when=[]
rouge_l_scores=[]
for i,row in df.iterrows():
srl=df["when"][i]
claim=df['claim'][i]
answer= split_ws(df["when"])
evidence=df["evidence"][i]
#time.sleep(10)
if srl!="":
try:
for j in range(0,len(answer)):
FACT_TO_GENERATE_QUESTION_FROM = f"""generate_the_question_from_context_using_the_next_answer:{answer[j]} [SEP] context:{claim}"""
#time.sleep(10)
question_ids = query({"inputs":FACT_TO_GENERATE_QUESTION_FROM,
"num_beams":5,
"early_stopping":True,
"min_length": 100,"wait_for_model":True})[0]['generated_text'].capitalize()
question_ids = rephrase_question_when(question_ids)
list_of_ques_when.append(f"""Q{j+1}:{question_ids}""")
list_of_ans_when.append(f"""Ans{j+1}:{answer[j]}""")
input_evidence = f"answer_the_next_question_from_context: {question_ids} context: {evidence}"
#time.sleep(10)
answer_evidence = query_evidence({"inputs":input_evidence,"truncation":True,"wait_for_model":True})[0]['generated_text']
if answer_evidence.lower() in evidence.lower():
list_of_evidence_answer_when.append(f"""Evidence{j+1}:{answer_evidence}""")
else:
answer_evidence=""
list_of_evidence_answer_when.append(f"""No mention of 'when'in any related documents.""")
threshold = 0.2
list_of_pairs = [(answer_evidence, answer[j])]
rouge_l_score = calc_rouge_l_score(answer_evidence, answer[j])
if rouge_l_score >= threshold:
verification_status = 'Verified Valid'
elif rouge_l_score == 0:
verification_status = 'Not verifiable'
else:
verification_status = 'Verified False'
rouge_l_scores.append(verification_status)
except:
pass
else:
list_of_ques_when="No claims"
list_of_ans_when=""
list_of_evidence_answer_when="No mention of 'when'in any related documents."
rouge_l_scores="Not verifiable"
return list_of_ques_when,list_of_ans_when,rouge_l_scores,list_of_evidence_answer_when
#------------------------------------------------------
def rephrase_question_where(question):
if not question.lower().startswith("where"):
words = question.split()
words[0] = "Where"
return " ".join(words)
else:
return question
#------------------------------------------------------
def gen_qa_where(df):
list_of_ques_where=[]
list_of_ans_where=[]
list_of_evidence_answer_where=[]
rouge_l_scores=[]
for i,row in df.iterrows():
srl=df["where"][i]
claim=df['claim'][i]
answer= split_ws(df["where"])
evidence=df["evidence"][i]
#time.sleep(10)
if srl!="":
try:
for j in range(0,len(answer)):
FACT_TO_GENERATE_QUESTION_FROM = f"""generate_the_question_from_context_using_the_next_answer:{answer[j]} [SEP] context:{claim}"""
#time.sleep(10)
question_ids = query({"inputs":FACT_TO_GENERATE_QUESTION_FROM,
"num_beams":5,
"early_stopping":True,
"min_length": 100,"wait_for_model":True})[0]['generated_text'].capitalize()
question_ids = rephrase_question_where(question_ids)
list_of_ques_where.append(f"""Q{j+1}:{question_ids}""")
list_of_ans_where.append(f"""Ans{j+1}:{answer[j]}""")
input_evidence = f"answer_the_next_question_from_context: {question_ids} context: {evidence}"
#time.sleep(10)
answer_evidence = query_evidence({"inputs":input_evidence,"truncation":True,"wait_for_model":True})[0]['generated_text']
if answer_evidence.lower() in evidence.lower():
list_of_evidence_answer_where.append(f"""Evidence{j+1}:{answer_evidence}""")
else:
answer_evidence=""
list_of_evidence_answer_where.append(f"""No mention of 'where'in any related documents.""")
threshold = 0.2
list_of_pairs = [(answer_evidence, answer[j])]
rouge_l_score = calc_rouge_l_score(answer_evidence, answer[j])
if rouge_l_score >= threshold:
verification_status = 'Verified Valid'
elif rouge_l_score == 0:
verification_status = 'Not verifiable'
else:
verification_status = 'Verified False'
rouge_l_scores.append(verification_status)
except:
pass
else:
list_of_ques_where="No claims"
list_of_ans_where=""
list_of_evidence_answer_where="No mention of 'where'in any related documents."
rouge_l_scores="Not verifiable"
return list_of_ques_where,list_of_ans_where,rouge_l_scores,list_of_evidence_answer_where
#------------------------------------------------------
#------------------------------------------------------------
if claim_text:
if evidence_text:
df=claim(claim_text)
df["evidence"]=evidence_text
final_df = pd.DataFrame(columns=['Who Claims', 'What Claims', 'When Claims', 'Where Claims', 'Why Claims'])
final_df["Who Claims"]=gen_qa_who(df)
final_df["What Claims"]=gen_qa_what(df)
final_df["When Claims"]=gen_qa_when(df)
final_df["Where Claims"]=gen_qa_where(df)
final_df["Why Claims"]=gen_qa_why(df)
st.dataframe(final_df)
# a,b=qa_evidence(final_data)
# qa_evidence(final_data)
# st.json(qa_evidence(final_data))
# st.json({'QA pair from claim':[{"Question": qu, "Answer": an} for qu, an in a],
# 'QA pair from evidence':[{"Question": qu, "Answer": an} for qu, an in b]})