|
import streamlit as st |
|
from transformers import pipeline |
|
import re |
|
|
|
import requests |
|
|
|
API_URL = "https://api-inference.huggingface.co/models/microsoft/prophetnet-large-uncased-squad-qg" |
|
headers = {"Authorization": "Bearer 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"} |
|
|
|
def query(payload): |
|
response = requests.post(API_URL, headers=headers, json=payload) |
|
return response.json() |
|
|
|
|
|
|
|
|
|
API_URL_evidence ="https://api-inference.huggingface.co/models/google/flan-t5-xxl" |
|
headers_evidence = {"Authorization": "Bearer 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"} |
|
|
|
def query_evidence(payload): |
|
response = requests.post(API_URL_evidence, headers=headers_evidence, json=payload) |
|
return response.json() |
|
|
|
|
|
st.title('Welcome to :blue[FACTIFY - 5WQA] ') |
|
st.header('5W Aspect-based Fact Verification through Question Answering :blue[Web Demo]') |
|
st.subheader('Here are a few steps to begin exploring and interacting with this demo.') |
|
st.caption('First you need to input your claim and press :green[ctrl+enter].') |
|
st.caption('Then you need to input your evidence and press :green[ctrl+enter].') |
|
st.caption('Upon completing these two steps, kindly wait for a minute to receive the results.') |
|
|
|
st.caption('Start by inputting the following instance of a claim and corresponding evidence into the designated text fields.') |
|
|
|
st.caption('**Example 1**') |
|
st.caption(''':green[Claim:] :point_right: Amazon announced on March 16 it would hire 100,000 new warehouse and delivery workers and |
|
raise wages $ 2 per hour through April in response to increased demand for its services because of the coronavirus pandemic .''') |
|
|
|
st.caption(''':green[Evidence:] :point_right: Due to the consumers increasingly relying on online retailers, |
|
Amazon planned to hire over 99,000 workers in the warehouse and delivery sector during the Pandemic in the USA.''') |
|
|
|
|
|
st.caption('**Example 2**') |
|
st.caption(''':green[Claim:] :point_right: As of March 18 , 2020 , there were more than 77 confirmed |
|
cases reported in the state of Virginia.''') |
|
|
|
st.caption(''':green[Evidence:] :point_right: The Washington region’s total number of novel coronavirus cases grew to 203 on Wednesday. |
|
Maryland added 23 cases Wednesday, bringing the state’s total to 86. |
|
Virginia reported 10 more cases, for a total of 77, including the Washington region’s only two deaths.''') |
|
|
|
|
|
|
|
|
|
|
|
claim_text=st.text_area("Enter your claim:") |
|
|
|
evidence_text=st.text_area("Enter your evidence:") |
|
|
|
if evidence_text: |
|
st.caption(':blue[At times, you may encounter null/none outputs, which could be a result of a delay in loading the models through the API. If you experience this problem, kindly try again after a few minutes.]') |
|
|
|
|
|
import pandas as pd |
|
import numpy as np |
|
from allennlp.predictors.predictor import Predictor |
|
import allennlp_models.tagging |
|
predictor = Predictor.from_path("structured-prediction-srl-bert.tar.gz") |
|
|
|
|
|
def claim(text): |
|
import re |
|
df = pd.DataFrame({'claim' : [text]}) |
|
pattern = r'(\d+),(\d+)' |
|
def remove_number_commas(match): |
|
return match.group(1) + match.group(2) |
|
df['claim'] = df['claim'].apply(lambda x: re.sub(pattern, remove_number_commas, x)) |
|
def srl_allennlp(sent): |
|
try: |
|
|
|
|
|
result = predictor.predict(sentence=sent) |
|
return(result) |
|
except IndexError: |
|
pass |
|
|
|
|
|
df['allennlp_srl'] = df['claim'].apply(lambda x: srl_allennlp(x)) |
|
|
|
df['number_of_verbs'] = '' |
|
df['verbs_group'] = '' |
|
df['words'] = '' |
|
df['verbs'] = '' |
|
df['modified'] ='' |
|
|
|
col1 = df['allennlp_srl'] |
|
for i in range(len(col1)): |
|
num_verb = len(col1[i]['verbs']) |
|
df['number_of_verbs'][i] = num_verb |
|
df['verbs_group'][i] = col1[i]['verbs'] |
|
df['words'][i] = col1[i]['words'] |
|
|
|
x=[] |
|
for verb in range(len(col1[i]['verbs'])): |
|
x.append(col1[i]['verbs'][verb]['verb']) |
|
df['verbs'][i] = x |
|
|
|
verb_dict ={} |
|
desc = [] |
|
for j in range(len(col1[i]['verbs'])): |
|
string = (col1[i]['verbs'][j]['description']) |
|
string = string.replace("ARG0", "who") |
|
string = string.replace("ARG1", "what") |
|
string = string.replace("ARGM-TMP", "when") |
|
string = string.replace("ARGM-LOC", "where") |
|
string = string.replace("ARGM-CAU", "why") |
|
desc.append(string) |
|
verb_dict[col1[i]['verbs'][j]['verb']]=string |
|
df['modified'][i] = verb_dict |
|
|
|
|
|
|
|
df['who'] = '' |
|
for j in range(len(df['modified'])): |
|
val_list = [] |
|
val_string = '' |
|
for k,v in df['modified'][j].items(): |
|
|
|
val_list.append(v) |
|
|
|
who = [] |
|
for indx in range(len(val_list)): |
|
val_string = val_list[indx] |
|
pos = val_string.find("who: ") |
|
substr = '' |
|
|
|
if pos != -1: |
|
for i in range(pos+5, len(val_string)): |
|
if val_string[i] == "]": |
|
break |
|
else: |
|
substr = substr + val_string[i] |
|
else: |
|
substr = None |
|
who.append(substr) |
|
|
|
df['who'][j] = who |
|
|
|
|
|
df['what'] = '' |
|
for j in range(len(df['modified'])): |
|
val_list = [] |
|
val_string = '' |
|
for k,v in df['modified'][j].items(): |
|
|
|
val_list.append(v) |
|
|
|
what = [] |
|
for indx in range(len(val_list)): |
|
val_string = val_list[indx] |
|
pos = val_string.find("what: ") |
|
substr = '' |
|
|
|
if pos != -1: |
|
for i in range(pos+6, len(val_string)): |
|
if val_string[i] == "]": |
|
break |
|
else: |
|
substr = substr + val_string[i] |
|
else: |
|
substr = None |
|
what.append(substr) |
|
|
|
df['what'][j] = what |
|
|
|
|
|
df['why'] = '' |
|
for j in range(len(df['modified'])): |
|
val_list = [] |
|
val_string = '' |
|
for k,v in df['modified'][j].items(): |
|
|
|
val_list.append(v) |
|
|
|
why = [] |
|
for indx in range(len(val_list)): |
|
val_string = val_list[indx] |
|
pos = val_string.find("why: ") |
|
substr = '' |
|
|
|
if pos != -1: |
|
for i in range(pos+5, len(val_string)): |
|
if val_string[i] == "]": |
|
break |
|
else: |
|
substr = substr + val_string[i] |
|
else: |
|
substr = None |
|
why.append(substr) |
|
|
|
df['why'][j] = why |
|
|
|
|
|
df['when'] = '' |
|
for j in range(len(df['modified'])): |
|
val_list = [] |
|
val_string = '' |
|
for k,v in df['modified'][j].items(): |
|
|
|
val_list.append(v) |
|
|
|
when = [] |
|
for indx in range(len(val_list)): |
|
val_string = val_list[indx] |
|
pos = val_string.find("when: ") |
|
substr = '' |
|
|
|
if pos != -1: |
|
for i in range(pos+6, len(val_string)): |
|
if val_string[i] == "]": |
|
break |
|
else: |
|
substr = substr + val_string[i] |
|
else: |
|
substr = None |
|
when.append(substr) |
|
|
|
df['when'][j] = when |
|
|
|
|
|
|
|
df['where'] = '' |
|
for j in range(len(df['modified'])): |
|
val_list = [] |
|
val_string = '' |
|
for k,v in df['modified'][j].items(): |
|
|
|
val_list.append(v) |
|
|
|
where = [] |
|
for indx in range(len(val_list)): |
|
val_string = val_list[indx] |
|
pos = val_string.find("where: ") |
|
substr = '' |
|
|
|
if pos != -1: |
|
for i in range(pos+7, len(val_string)): |
|
if val_string[i] == "]": |
|
break |
|
else: |
|
substr = substr + val_string[i] |
|
else: |
|
substr = None |
|
where.append(substr) |
|
|
|
df['where'][j] = where |
|
|
|
data=df[["claim","who","what","why","when","where"]].copy() |
|
import re |
|
def remove_trail_comma(text): |
|
x = re.sub(",\s*$", "", text) |
|
return x |
|
|
|
|
|
data['claim']=data['claim'].apply(lambda x: str(x).replace('\'','').replace('\'','')) |
|
data['claim']=data['claim'].apply(lambda x: str(x).replace('[','').replace(']','')) |
|
|
|
|
|
|
|
data['who']=data['who'].apply(lambda x: str(x).replace(" 's","'s")) |
|
data['who']=data['who'].apply(lambda x: str(x).replace("s ’","s’")) |
|
data['who']=data['who'].apply(lambda x: str(x).replace(" - ","-")) |
|
data['who']=data['who'].apply(lambda x: str(x).replace('\'','').replace('\'','')) |
|
|
|
data['who']=data['who'].apply(lambda x: str(x).replace('[','').replace(']','')) |
|
data['who']=data['who'].apply(lambda x: str(x).rstrip(',')) |
|
data['who']=data['who'].apply(lambda x: str(x).lstrip(',')) |
|
data['who']=data['who'].apply(lambda x: str(x).replace('None,','').replace('None','')) |
|
data['who']=data['who'].apply(remove_trail_comma) |
|
|
|
|
|
|
|
data['what']=data['what'].apply(lambda x: str(x).replace(" 's","'s")) |
|
data['what']=data['what'].apply(lambda x: str(x).replace("s ’","s’")) |
|
data['what']=data['what'].apply(lambda x: str(x).replace(" - ","-")) |
|
data['what']=data['what'].apply(lambda x: str(x).replace('\'','').replace('\'','')) |
|
|
|
data['what']=data['what'].apply(lambda x: str(x).replace('[','').replace(']','')) |
|
data['what']=data['what'].apply(lambda x: str(x).rstrip(',')) |
|
data['what']=data['what'].apply(lambda x: str(x).lstrip(',')) |
|
data['what']=data['what'].apply(lambda x: str(x).replace('None,','').replace('None','')) |
|
data['what']=data['what'].apply(remove_trail_comma) |
|
|
|
data['why']=data['why'].apply(lambda x: str(x).replace(" 's","'s")) |
|
data['why']=data['why'].apply(lambda x: str(x).replace("s ’","s’")) |
|
data['why']=data['why'].apply(lambda x: str(x).replace(" - ","-")) |
|
data['why']=data['why'].apply(lambda x: str(x).replace('\'','').replace('\'','')) |
|
|
|
data['why']=data['why'].apply(lambda x: str(x).replace('[','').replace(']','')) |
|
data['why']=data['why'].apply(lambda x: str(x).rstrip(',')) |
|
data['why']=data['why'].apply(lambda x: str(x).lstrip(',')) |
|
data['why']=data['why'].apply(lambda x: str(x).replace('None,','').replace('None','')) |
|
data['why']=data['why'].apply(remove_trail_comma) |
|
|
|
data['when']=data['when'].apply(lambda x: str(x).replace(" 's","'s")) |
|
data['when']=data['when'].apply(lambda x: str(x).replace("s ’","s’")) |
|
data['when']=data['when'].apply(lambda x: str(x).replace(" - ","-")) |
|
data['when']=data['when'].apply(lambda x: str(x).replace('\'','').replace('\'','')) |
|
|
|
data['when']=data['when'].apply(lambda x: str(x).replace('[','').replace(']','')) |
|
data['when']=data['when'].apply(lambda x: str(x).rstrip(',')) |
|
data['when']=data['when'].apply(lambda x: str(x).lstrip(',')) |
|
data['when']=data['when'].apply(lambda x: str(x).replace('None,','').replace('None','')) |
|
data['when']=data['when'].apply(remove_trail_comma) |
|
|
|
data['where']=data['where'].apply(lambda x: str(x).replace(" 's","'s")) |
|
data['where']=data['where'].apply(lambda x: str(x).replace("s ’","s’")) |
|
data['where']=data['where'].apply(lambda x: str(x).replace(" - ","-")) |
|
data['where']=data['where'].apply(lambda x: str(x).replace('\'','').replace('\'','')) |
|
|
|
data['where']=data['where'].apply(lambda x: str(x).replace('[','').replace(']','')) |
|
data['where']=data['where'].apply(lambda x: str(x).rstrip(',')) |
|
data['where']=data['where'].apply(lambda x: str(x).lstrip(',')) |
|
data['where']=data['where'].apply(lambda x: str(x).replace('None,','').replace('None','')) |
|
data['where']=data['where'].apply(remove_trail_comma) |
|
return data |
|
|
|
def split_ws(input_list): |
|
import re |
|
output_list = [] |
|
for item in input_list: |
|
split_item = re.findall(r'[^",]+|"[^"]*"', item) |
|
output_list += split_item |
|
result = [x.strip() for x in output_list] |
|
return result |
|
|
|
|
|
def gen_qq(df): |
|
w_list=["who","when","where","what","why"] |
|
ans=[] |
|
cl=[] |
|
ind=[] |
|
ques=[] |
|
evid=[] |
|
for index,value in enumerate(w_list): |
|
for i,row in df.iterrows(): |
|
srl=df[value][i] |
|
claim=df['claim'][i] |
|
evidence_text=df['evidence'][i] |
|
answer= split_ws(df[value]) |
|
try: |
|
if len(srl.split())>0 and len(srl.split(","))>0: |
|
for j in range(0,len(answer)): |
|
FACT_TO_GENERATE_QUESTION_FROM = f"""{answer[j]} [SEP] {claim}""" |
|
question_ids = query({"inputs":FACT_TO_GENERATE_QUESTION_FROM, |
|
"num_beams":5, |
|
"early_stopping":True}) |
|
|
|
|
|
|
|
ind.append(i) |
|
cl.append(claim) |
|
ans.append(answer[j]) |
|
ques.append(question_ids[0]['generated_text'].capitalize()) |
|
evid.append(evidence_text) |
|
|
|
except: |
|
pass |
|
return cl,ques,ans,evid |
|
|
|
def qa_evidence(final_data): |
|
ans=[] |
|
cl=[] |
|
|
|
ques=[] |
|
evi=[] |
|
srl_ans=[] |
|
|
|
|
|
for i,row in final_data.iterrows(): |
|
question=final_data['gen_question'][i] |
|
evidence=final_data['evidence'][i] |
|
claim=final_data['actual_claim'][i] |
|
srl_answer=final_data['actual_answer'][i] |
|
|
|
|
|
input_evidence = f"question: {question} context: {evidence}" |
|
|
|
answer = query_evidence({ |
|
"inputs":input_evidence, |
|
"truncation":True}) |
|
|
|
|
|
cl.append(claim) |
|
ans.append(answer[0]["generated_text"]) |
|
ques.append(question) |
|
evi.append(evidence) |
|
srl_ans.append(srl_answer) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return list(zip(ques,srl_ans)),list(zip(ques,ans)) |
|
|
|
|
|
|
|
if claim_text: |
|
if evidence_text: |
|
df=claim(claim_text) |
|
df["evidence"]=evidence_text |
|
actual_claim,gen_question,actual_answer,evidence=gen_qq(df) |
|
final_data=pd.DataFrame([actual_claim,gen_question,actual_answer,evidence]).T |
|
final_data.columns=["actual_claim","gen_question","actual_answer","evidence"] |
|
a,b=qa_evidence(final_data) |
|
|
|
|
|
st.json({'QA pair from claim':[{"Question": qu, "Answer": an} for qu, an in a], |
|
'QA pair from evidence':[{"Question": qu, "Answer": an} for qu, an in b]}) |