Spaces:

team-ai-law-assistant
/

CUAD

Runtime error

App Files Files Community

muhtasham commited on Nov 22, 2021

Commit

47dd29e

•

1 Parent(s): 13cf51e

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -55

app.py CHANGED Viewed

@@ -21,58 +21,6 @@ st.sidebar.write("Git Hub: https://github.com/TheAtticusProject/cuad")
 st.sidebar.write("CUAD Dataset: https://huggingface.co/datasets/cuad")
 @st.cache(allow_output_mutation=True)
-def load_model():
-    model = AutoModelForQuestionAnswering.from_pretrained(model_checkpoint)
-    tokenizer = AutoTokenizer.from_pretrained(model_checkpoint , use_fast=False)
-    return model, tokenizer
-@st.cache(allow_output_mutation=True)
-def load_questions():
-	with open('test.json') as json_file:
-		data = json.load(json_file)
-	questions = []
-	for i, q in enumerate(data['data'][0]['paragraphs'][0]['qas']):
-		question = data['data'][0]['paragraphs'][0]['qas'][i]['question']
-		questions.append(question)
-	return questions
-@st.cache(allow_output_mutation=True)
-def load_contracts():
-	with open('test.json') as json_file:
-		data = json.load(json_file)
-	contracts = []
-	for i, q in enumerate(data['data']):
-		contract = ' '.join(data['data'][i]['paragraphs'][0]['context'].split())
-		contracts.append(contract)
-	return contracts
-model, tokenizer = load_model()
-questions = load_questions()
-contracts = load_contracts()
-contract = contracts[0]
-st.header("Contract Understanding Atticus Dataset (CUAD) Demo")
-st.write("Based on https://github.com/marshmellow77/cuad-demo")
-selected_question = st.selectbox('Choose one of the 41 queries from the CUAD dataset:', questions)
-question_set = [questions[0], selected_question]
-contract_type = st.radio("Select Contract", ("Sample Contract", "New Contract"))
-if contract_type == "Sample Contract":
-	sample_contract_num = st.slider("Select Sample Contract #")
-	contract = contracts[sample_contract_num]
-	with st.expander(f"Sample Contract #{sample_contract_num}"):
-		st.write(contract)
-else:
-	contract = st.text_area("Input New Contract", "", height=256)
-Run_Button = st.button("Run", key=None)
-if Run_Button == True and not len(contract)==0 and not len(question_set)==0:
-	predictions = run_prediction(question_set, contract, 'akdeniz27/roberta-base-cuad')
-	for i, p in enumerate(predictions):
-		if i != 0: st.write(f"Question: {question_set[int(p)]}\n\nAnswer: {predictions[p]}\n\n")
 def run_prediction(question_texts, context_text, model_path):
     max_seq_length = 512
     doc_stride = 256
@@ -81,8 +29,7 @@ def run_prediction(question_texts, context_text, model_path):
     max_answer_length = 512
     do_lower_case = False
     null_score_diff_threshold = 0.0
-        def to_list(tensor):
         return tensor.detach().cpu().tolist()
     config_class, model_class, tokenizer_class = (
         AutoConfig, AutoModelForQuestionAnswering, AutoTokenizer)
@@ -151,4 +98,56 @@ def run_prediction(question_texts, context_text, model_path):
         null_score_diff_threshold=null_score_diff_threshold,
         tokenizer=tokenizer
     )
-    return final_predictions

 st.sidebar.write("CUAD Dataset: https://huggingface.co/datasets/cuad")
 @st.cache(allow_output_mutation=True)
 def run_prediction(question_texts, context_text, model_path):
     max_seq_length = 512
     doc_stride = 256
     max_answer_length = 512
     do_lower_case = False
     null_score_diff_threshold = 0.0
+    def to_list(tensor):
         return tensor.detach().cpu().tolist()
     config_class, model_class, tokenizer_class = (
         AutoConfig, AutoModelForQuestionAnswering, AutoTokenizer)
         null_score_diff_threshold=null_score_diff_threshold,
         tokenizer=tokenizer
     )
+    return final_predictions
+@st.cache(allow_output_mutation=True)
+def load_model():
+    model = AutoModelForQuestionAnswering.from_pretrained(model_checkpoint)
+    tokenizer = AutoTokenizer.from_pretrained(model_checkpoint , use_fast=False)
+    return model, tokenizer
+@st.cache(allow_output_mutation=True)
+def load_questions():
+	with open('test.json') as json_file:
+		data = json.load(json_file)
+	questions = []
+	for i, q in enumerate(data['data'][0]['paragraphs'][0]['qas']):
+		question = data['data'][0]['paragraphs'][0]['qas'][i]['question']
+		questions.append(question)
+	return questions
+@st.cache(allow_output_mutation=True)
+def load_contracts():
+	with open('test.json') as json_file:
+		data = json.load(json_file)
+	contracts = []
+	for i, q in enumerate(data['data']):
+		contract = ' '.join(data['data'][i]['paragraphs'][0]['context'].split())
+		contracts.append(contract)
+	return contracts
+model, tokenizer = load_model()
+questions = load_questions()
+contracts = load_contracts()
+contract = contracts[0]
+st.header("Contract Understanding Atticus Dataset (CUAD) Demo")
+st.write("Based on https://github.com/marshmellow77/cuad-demo")
+selected_question = st.selectbox('Choose one of the 41 queries from the CUAD dataset:', questions)
+question_set = [questions[0], selected_question]
+contract_type = st.radio("Select Contract", ("Sample Contract", "New Contract"))
+if contract_type == "Sample Contract":
+	sample_contract_num = st.slider("Select Sample Contract #")
+	contract = contracts[sample_contract_num]
+	with st.expander(f"Sample Contract #{sample_contract_num}"):
+		st.write(contract)
+else:
+	contract = st.text_area("Input New Contract", "", height=256)
+Run_Button = st.button("Run", key=None)
+if Run_Button == True and not len(contract)==0 and not len(question_set)==0:
+	predictions = run_prediction(question_set, contract, 'akdeniz27/roberta-base-cuad')
+	for i, p in enumerate(predictions):
+		if i != 0: st.write(f"Question: {question_set[int(p)]}\n\nAnswer: {predictions[p]}\n\n")