import streamlit as st from transformers import pipeline import re import requests API_URL = "https://api-inference.huggingface.co/models/microsoft/prophetnet-large-uncased-squad-qg" headers = {"Authorization": "Bearer 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"} def query(payload): response = requests.post(API_URL, headers=headers, json=payload) return response.json() #----------------------------------------------------------- API_URL_evidence ="https://api-inference.huggingface.co/models/google/flan-t5-xxl" headers_evidence = {"Authorization": "Bearer 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"} def query_evidence(payload): response = requests.post(API_URL_evidence, headers=headers_evidence, json=payload) return response.json() #----------------------------------------------------------- st.title('Welcome to :blue[FACTIFY - 5WQA]: ') st.header('5W Aspect-based Fact Verification through Question Answering :blue[Web Demo]') st.subheader('Here are a few steps to begin exploring and interacting with this demo.') st.caption('First you need to input your claim and press :green[ctrl+enter].') st.caption('Then you need to input your evidence and press :green[ctrl+enter].') st.caption('After doing this two steps, you need to wait a minute to get the results.') st.caption(':red[At times, you may encounter null/none outputs, which could be a result of a delay in loading the models through the API. If you experience this problem, kindly try again after a few minutes.]') #----------------------------------------------------------- claim_text=st.text_area("Enter your claim:") evidence_text=st.text_area("Enter your evidence:") import pandas as pd import numpy as np from allennlp.predictors.predictor import Predictor import allennlp_models.tagging predictor = Predictor.from_path("structured-prediction-srl-bert.tar.gz") #--------------------------------------------------------------- def claim(text): import re df = pd.DataFrame({'claim' : [text]}) pattern = r'(\d+),(\d+)' def remove_number_commas(match): return match.group(1) + match.group(2) df['claim'] = df['claim'].apply(lambda x: re.sub(pattern, remove_number_commas, x)) def srl_allennlp(sent): try: #result = predictor.predict(sentence=sent)['verbs'][0]['description'] #result = predictor.predict(sentence=sent)['verbs'][0]['tags'] result = predictor.predict(sentence=sent) return(result) except IndexError: pass #return(predictor.predict(sentence=sent)) df['allennlp_srl'] = df['claim'].apply(lambda x: srl_allennlp(x)) df['number_of_verbs'] = '' df['verbs_group'] = '' df['words'] = '' df['verbs'] = '' df['modified'] ='' col1 = df['allennlp_srl'] for i in range(len(col1)): num_verb = len(col1[i]['verbs']) df['number_of_verbs'][i] = num_verb df['verbs_group'][i] = col1[i]['verbs'] df['words'][i] = col1[i]['words'] x=[] for verb in range(len(col1[i]['verbs'])): x.append(col1[i]['verbs'][verb]['verb']) df['verbs'][i] = x verb_dict ={} desc = [] for j in range(len(col1[i]['verbs'])): string = (col1[i]['verbs'][j]['description']) string = string.replace("ARG0", "who") string = string.replace("ARG1", "what") string = string.replace("ARGM-TMP", "when") string = string.replace("ARGM-LOC", "where") string = string.replace("ARGM-CAU", "why") desc.append(string) verb_dict[col1[i]['verbs'][j]['verb']]=string df['modified'][i] = verb_dict #----------FOR COLUMN "WHO"------------# df['who'] = '' for j in range(len(df['modified'])): val_list = [] val_string = '' for k,v in df['modified'][j].items(): # print(type(v)) val_list.append(v) who = [] for indx in range(len(val_list)): val_string = val_list[indx] pos = val_string.find("who: ") substr = '' if pos != -1: for i in range(pos+5, len(val_string)): if val_string[i] == "]": break else: substr = substr + val_string[i] else: substr = None who.append(substr) df['who'][j] = who #----------FOR COLUMN "WHAT"------------# df['what'] = '' for j in range(len(df['modified'])): val_list = [] val_string = '' for k,v in df['modified'][j].items(): # print(type(v)) val_list.append(v) what = [] for indx in range(len(val_list)): val_string = val_list[indx] pos = val_string.find("what: ") substr = '' if pos != -1: for i in range(pos+6, len(val_string)): if val_string[i] == "]": break else: substr = substr + val_string[i] else: substr = None what.append(substr) df['what'][j] = what #----------FOR COLUMN "WHY"------------# df['why'] = '' for j in range(len(df['modified'])): val_list = [] val_string = '' for k,v in df['modified'][j].items(): # print(type(v)) val_list.append(v) why = [] for indx in range(len(val_list)): val_string = val_list[indx] pos = val_string.find("why: ") substr = '' if pos != -1: for i in range(pos+5, len(val_string)): if val_string[i] == "]": break else: substr = substr + val_string[i] else: substr = None why.append(substr) df['why'][j] = why #----------FOR COLUMN "WHEN"------------# df['when'] = '' for j in range(len(df['modified'])): val_list = [] val_string = '' for k,v in df['modified'][j].items(): # print(type(v)) val_list.append(v) when = [] for indx in range(len(val_list)): val_string = val_list[indx] pos = val_string.find("when: ") substr = '' if pos != -1: for i in range(pos+6, len(val_string)): if val_string[i] == "]": break else: substr = substr + val_string[i] else: substr = None when.append(substr) df['when'][j] = when #----------FOR COLUMN "WHERE"------------# df['where'] = '' for j in range(len(df['modified'])): val_list = [] val_string = '' for k,v in df['modified'][j].items(): # print(type(v)) val_list.append(v) where = [] for indx in range(len(val_list)): val_string = val_list[indx] pos = val_string.find("where: ") substr = '' if pos != -1: for i in range(pos+7, len(val_string)): if val_string[i] == "]": break else: substr = substr + val_string[i] else: substr = None where.append(substr) df['where'][j] = where data=df[["claim","who","what","why","when","where"]].copy() import re def remove_trail_comma(text): x = re.sub(",\s*$", "", text) return x data['claim']=data['claim'].apply(lambda x: str(x).replace('\'','').replace('\'','')) data['claim']=data['claim'].apply(lambda x: str(x).replace('[','').replace(']','')) data['who']=data['who'].apply(lambda x: str(x).replace(" 's","'s")) data['who']=data['who'].apply(lambda x: str(x).replace("s ’","s’")) data['who']=data['who'].apply(lambda x: str(x).replace(" - ","-")) data['who']=data['who'].apply(lambda x: str(x).replace('\'','').replace('\'','')) # data['who']=data['who'].apply(lambda x: str(x).replace('"','').replace('"','')) data['who']=data['who'].apply(lambda x: str(x).replace('[','').replace(']','')) data['who']=data['who'].apply(lambda x: str(x).rstrip(',')) data['who']=data['who'].apply(lambda x: str(x).lstrip(',')) data['who']=data['who'].apply(lambda x: str(x).replace('None,','').replace('None','')) data['who']=data['who'].apply(remove_trail_comma) data['what']=data['what'].apply(lambda x: str(x).replace(" 's","'s")) data['what']=data['what'].apply(lambda x: str(x).replace("s ’","s’")) data['what']=data['what'].apply(lambda x: str(x).replace(" - ","-")) data['what']=data['what'].apply(lambda x: str(x).replace('\'','').replace('\'','')) # data['what']=data['what'].apply(lambda x: str(x).replace('"','').replace('"','')) data['what']=data['what'].apply(lambda x: str(x).replace('[','').replace(']','')) data['what']=data['what'].apply(lambda x: str(x).rstrip(',')) data['what']=data['what'].apply(lambda x: str(x).lstrip(',')) data['what']=data['what'].apply(lambda x: str(x).replace('None,','').replace('None','')) data['what']=data['what'].apply(remove_trail_comma) data['why']=data['why'].apply(lambda x: str(x).replace(" 's","'s")) data['why']=data['why'].apply(lambda x: str(x).replace("s ’","s’")) data['why']=data['why'].apply(lambda x: str(x).replace(" - ","-")) data['why']=data['why'].apply(lambda x: str(x).replace('\'','').replace('\'','')) # data['why']=data['why'].apply(lambda x: str(x).replace('"','').replace('"','')) data['why']=data['why'].apply(lambda x: str(x).replace('[','').replace(']','')) data['why']=data['why'].apply(lambda x: str(x).rstrip(',')) data['why']=data['why'].apply(lambda x: str(x).lstrip(',')) data['why']=data['why'].apply(lambda x: str(x).replace('None,','').replace('None','')) data['why']=data['why'].apply(remove_trail_comma) data['when']=data['when'].apply(lambda x: str(x).replace(" 's","'s")) data['when']=data['when'].apply(lambda x: str(x).replace("s ’","s’")) data['when']=data['when'].apply(lambda x: str(x).replace(" - ","-")) data['when']=data['when'].apply(lambda x: str(x).replace('\'','').replace('\'','')) # data['when']=data['when'].apply(lambda x: str(x).replace('"','').replace('"','')) data['when']=data['when'].apply(lambda x: str(x).replace('[','').replace(']','')) data['when']=data['when'].apply(lambda x: str(x).rstrip(',')) data['when']=data['when'].apply(lambda x: str(x).lstrip(',')) data['when']=data['when'].apply(lambda x: str(x).replace('None,','').replace('None','')) data['when']=data['when'].apply(remove_trail_comma) data['where']=data['where'].apply(lambda x: str(x).replace(" 's","'s")) data['where']=data['where'].apply(lambda x: str(x).replace("s ’","s’")) data['where']=data['where'].apply(lambda x: str(x).replace(" - ","-")) data['where']=data['where'].apply(lambda x: str(x).replace('\'','').replace('\'','')) # data['where']=data['where'].apply(lambda x: str(x).replace('"','').replace('"','')) data['where']=data['where'].apply(lambda x: str(x).replace('[','').replace(']','')) data['where']=data['where'].apply(lambda x: str(x).rstrip(',')) data['where']=data['where'].apply(lambda x: str(x).lstrip(',')) data['where']=data['where'].apply(lambda x: str(x).replace('None,','').replace('None','')) data['where']=data['where'].apply(remove_trail_comma) return data #------------------------------------------------------------------------- def split_ws(input_list): import re output_list = [] for item in input_list: split_item = re.findall(r'[^",]+|"[^"]*"', item) output_list += split_item result = [x.strip() for x in output_list] return result #-------------------------------------------------------------------------- def gen_qq(df): w_list=["who","when","where","what","why"] ans=[] cl=[] ind=[] ques=[] evid=[] for index,value in enumerate(w_list): for i,row in df.iterrows(): srl=df[value][i] claim=df['claim'][i] evidence_text=df['evidence'][i] answer= split_ws(df[value]) try: if len(srl.split())>0 and len(srl.split(","))>0: for j in range(0,len(answer)): FACT_TO_GENERATE_QUESTION_FROM = f"""{answer[j]} [SEP] {claim}""" question_ids = query({"inputs":FACT_TO_GENERATE_QUESTION_FROM, "num_beams":5, "early_stopping":True}) #print("claim : {}".format(claim)) #print("answer : {}".format(answer[j])) #print("question : {}".format(question_ids[0]['generated_text'])) ind.append(i) cl.append(claim) ans.append(answer[j]) ques.append(question_ids[0]['generated_text'].capitalize()) evid.append(evidence_text) #print("-----------------------------------------") except: pass return cl,ques,ans,evid #------------------------------------------------------------ def qa_evidence(final_data): ans=[] cl=[] #ind=[] ques=[] evi=[] srl_ans=[] for i,row in final_data.iterrows(): question=final_data['gen_question'][i] evidence=final_data['evidence'][i] claim=final_data['actual_claim'][i] srl_answer=final_data['actual_answer'][i] #index=df["index"][i] input_evidence = f"question: {question} context: {evidence}" answer = query_evidence({ "inputs":input_evidence, "truncation":True}) #ind.append(index) cl.append(claim) ans.append(answer[0]["generated_text"]) ques.append(question) evi.append(evidence) srl_ans.append(srl_answer) #print(f"""index: {index}""") # print(f"""evidence: {evidence}""") # print(f"""claim: {claim}""") # print(f"""Question: {question}""") # print(f"""Answer: {answer}""") # print(f"""SRL Answer: {srl_answer}""") # print("------------------------------------") # return list(zip(cl,ques,srl_ans)),list(zip(evi,ques,ans)) # return cl,ques return list(zip(ques,srl_ans)),list(zip(ques,ans)) #------------------------------------------------------------ if claim_text: if evidence_text: df=claim(claim_text) df["evidence"]=evidence_text actual_claim,gen_question,actual_answer,evidence=gen_qq(df) final_data=pd.DataFrame([actual_claim,gen_question,actual_answer,evidence]).T final_data.columns=["actual_claim","gen_question","actual_answer","evidence"] a,b=qa_evidence(final_data) # qa_evidence(final_data) # st.json(qa_evidence(final_data)) st.json({'QA pair from claim':[{"Question": qu, "Answer": an} for qu, an in a], 'QA pair from evidence':[{"Question": qu, "Answer": an} for qu, an in b]})