Spaces:

ashishraics
/

NLP

Runtime error

App Files Files Community

ashishraics commited on May 15, 2022

Commit

d77ac81

•

1 Parent(s): d670ff4

structure using config.yaml

Browse files

Files changed (3) hide show

app.py +8 -9
sentiment_clf_helper.py +1 -2
zeroshot_clf_helper.py +33 -11

app.py CHANGED Viewed

@@ -156,12 +156,11 @@ def sentiment_task_selected(task,
                             sent_onnx_mdl_name=sent_onnx_mdl_name,
                             sent_onnx_quant_mdl_name=sent_onnx_quant_mdl_name):
     ##model & tokenizer initialization for normal sentiment classification
-    # model_sentiment=AutoModelForSequenceClassification.from_pretrained(sent_chkpt)
-    # tokenizer_sentiment=AutoTokenizer.from_pretrained(sent_chkpt)
     tokenizer_sentiment = AutoTokenizer.from_pretrained(sent_mdl_dir)
     # # create onnx model for sentiment classification but once created in your local app comment this out
-    # create_onnx_model_sentiment(_model=model_sentiment, _tokenizer=tokenizer_sentiment)
     #create inference session
     sentiment_session = ort.InferenceSession(f"{sent_onnx_mdl_dir}/{sent_onnx_mdl_name}",sess_options=session_options_ort)
@@ -191,7 +190,7 @@ def zs_nli_task_selected(task,
     tokenizer_zs = AutoTokenizer.from_pretrained(zs_mdl_dir)
     ## create onnx model for zeroshot but once created locally comment it out.
-    #create_onnx_model_zs_nli()
     #create inference session from onnx model
     zs_session = ort.InferenceSession(f"{zs_onnx_mdl_dir}/{zs_onnx_mdl_name}",sess_options=session_options_ort)
@@ -203,7 +202,7 @@ def zs_nli_task_selected(task,
 ############### Pre-Download & instantiate objects for Zero shot clf NLI *********************** START **********************
 ## create model/token dir for zeroshot clf -- already created so not required
-# create_model_dir(chkpt=zs_mlm_chkpt, model_dir=zs_mlm_mdl_dir, task_type='mlm')
 @st.cache(allow_output_mutation=True, suppress_st_warning=True, max_entries=None, ttl=None)
 def zs_mlm_task_selected(task,
@@ -218,9 +217,7 @@ def zs_mlm_task_selected(task,
     tokenizer_zs_mlm = AutoTokenizer.from_pretrained(zs_mlm_mdl_dir)
     # # create onnx model for zeroshot but once created locally comment it out.
-    # create_onnx_model_zs_mlm(_model=model_zs_mlm,
-    #                          _tokenizer=tokenizer_zs_mlm,
-    #                          zs_mlm_onnx_mdl_dir=zs_mlm_onnx_mdl_dir)
     # create inference session from onnx model
     zs_session_mlm = ort.InferenceSession(f"{zs_mlm_onnx_mdl_dir}/{zs_mlm_onnx_mdl_name}", sess_options=session_options_ort)
@@ -293,7 +290,7 @@ if select_task=='Zero Shot Classification':
     t2 = time.time()
     st.write(f"Total time to load MLM Model is {(t2-t1)*1000:.1f} ms")
-    st.subheader("Zero Shot Classification using NLI")
     input_texts = st.text_input(label="Input text to classify into topics")
     input_lables = st.text_input(label="Enter labels separated by commas")
     input_hypothesis = st.text_input(label="Enter your hypothesis",value="This is an example of")
@@ -334,6 +331,8 @@ if select_task=='Zero Shot Classification':
                                                          )
         end=time.time()
         st.write(f"Time taken for computation {(end - start) * 1000:.1f} ms")
         fig = px.bar(x='Probability',
                      y='Labels',

                             sent_onnx_mdl_name=sent_onnx_mdl_name,
                             sent_onnx_quant_mdl_name=sent_onnx_quant_mdl_name):
     ##model & tokenizer initialization for normal sentiment classification
+    model_sentiment=AutoModelForSequenceClassification.from_pretrained(sent_mdl_dir)
     tokenizer_sentiment = AutoTokenizer.from_pretrained(sent_mdl_dir)
     # # create onnx model for sentiment classification but once created in your local app comment this out
+    create_onnx_model_sentiment(_model=model_sentiment, _tokenizer=tokenizer_sentiment)
     #create inference session
     sentiment_session = ort.InferenceSession(f"{sent_onnx_mdl_dir}/{sent_onnx_mdl_name}",sess_options=session_options_ort)
     tokenizer_zs = AutoTokenizer.from_pretrained(zs_mdl_dir)
     ## create onnx model for zeroshot but once created locally comment it out.
+    create_onnx_model_zs_nli(zs_chkpt=zs_chkpt,zs_onnx_mdl_dir=zs_onnx_mdl_dir)
     #create inference session from onnx model
     zs_session = ort.InferenceSession(f"{zs_onnx_mdl_dir}/{zs_onnx_mdl_name}",sess_options=session_options_ort)
 ############### Pre-Download & instantiate objects for Zero shot clf NLI *********************** START **********************
 ## create model/token dir for zeroshot clf -- already created so not required
+create_model_dir(chkpt=zs_mlm_chkpt, model_dir=zs_mlm_mdl_dir, task_type='mlm')
 @st.cache(allow_output_mutation=True, suppress_st_warning=True, max_entries=None, ttl=None)
 def zs_mlm_task_selected(task,
     tokenizer_zs_mlm = AutoTokenizer.from_pretrained(zs_mlm_mdl_dir)
     # # create onnx model for zeroshot but once created locally comment it out.
+    create_onnx_model_zs_mlm(zs_mlm_chkpt=zs_mlm_chkpt,zs_mlm_onnx_mdl_dir=zs_mlm_onnx_mdl_dir)
     # create inference session from onnx model
     zs_session_mlm = ort.InferenceSession(f"{zs_mlm_onnx_mdl_dir}/{zs_mlm_onnx_mdl_name}", sess_options=session_options_ort)
     t2 = time.time()
     st.write(f"Total time to load MLM Model is {(t2-t1)*1000:.1f} ms")
+    st.subheader("Zero Shot Classification using NLI & MLM")
     input_texts = st.text_input(label="Input text to classify into topics")
     input_lables = st.text_input(label="Enter labels separated by commas")
     input_hypothesis = st.text_input(label="Enter your hypothesis",value="This is an example of")
                                                          )
         end=time.time()
         st.write(f"Time taken for computation {(end - start) * 1000:.1f} ms")
+        st.write(f"Currently hypothesis and premise have *single token_type_ids*  ."
+                 f"Once updated for different *token_type_ids* expect the model performance to increase.")
         fig = px.bar(x='Probability',
                      y='Labels',

sentiment_clf_helper.py CHANGED Viewed

@@ -5,9 +5,8 @@ import transformers.convert_graph_to_onnx as onnx_convert
 from pathlib import Path
 import os
 import torch
 import yaml
 def read_yaml(file_path):
     with open(file_path, "r") as f:
         return yaml.safe_load(f)

 from pathlib import Path
 import os
 import torch
 import yaml
 def read_yaml(file_path):
     with open(file_path, "r") as f:
         return yaml.safe_load(f)

zeroshot_clf_helper.py CHANGED Viewed

@@ -26,6 +26,10 @@ zs_mlm_mdl_dir=config['ZEROSHOT_MLM']['zs_mlm_mdl_dir']
 zs_mlm_onnx_mdl_dir=config['ZEROSHOT_MLM']['zs_mlm_onnx_mdl_dir']
 zs_mlm_onnx_mdl_name=config['ZEROSHOT_MLM']['zs_mlm_onnx_mdl_name']
 def zero_shot_classification(premise: str, labels: str, model, tokenizer):
     """
@@ -67,12 +71,7 @@ def zero_shot_classification(premise: str, labels: str, model, tokenizer):
     return df
-##example
-# zero_shot_classification(premise='Tiny worms and breath analyzers could screen for disease while it’s early and treatable',
-#                          labels='science, sports, museum')
-def create_onnx_model_zs_nli(zs_onnx_mdl_dir=zs_onnx_mdl_dir):
     """
     Args:
@@ -86,7 +85,7 @@ def create_onnx_model_zs_nli(zs_onnx_mdl_dir=zs_onnx_mdl_dir):
     if not os.path.exists(zs_onnx_mdl_dir):
         try:
             subprocess.run(['python3', '-m', 'transformers.onnx',
-                            '--model=valhalla/distilbart-mnli-12-1',
                             '--feature=sequence-classification',
                             '--atol=1e-3',
                             zs_onnx_mdl_dir])
@@ -148,8 +147,7 @@ def zero_shot_classification_nli_onnx(premise,labels,_session,_tokenizer,hypothe
     return df
-def create_onnx_model_zs_mlm(_model, _tokenizer,zs_mlm_onnx_mdl_dir=zs_mlm_onnx_mdl_dir):
     """
     Args:
@@ -196,8 +194,31 @@ def zero_shot_classification_fillmask_onnx(premise,hypothesis,labels,_session,_t
     final_input= f"{premise}.{hypothesis} [MASK]" #this can change depending on chkpt, this is for bert-base-uncased chkpt
-    _inputs=_tokenizer(final_input,padding=True, truncation=True,
-                      return_tensors="pt")
     input_feed={
         'input_ids': np.array(_inputs['input_ids']),
@@ -205,6 +226,7 @@ def zero_shot_classification_fillmask_onnx(premise,hypothesis,labels,_session,_t
         'attention_mask': np.array(_inputs['attention_mask'])
     }
     output=_session.run(output_names=['logits'],input_feed=dict(input_feed))[0]
     mask_token_index = np.argwhere(_inputs["input_ids"] == _tokenizer.mask_token_id)[1,0]

 zs_mlm_onnx_mdl_dir=config['ZEROSHOT_MLM']['zs_mlm_onnx_mdl_dir']
 zs_mlm_onnx_mdl_name=config['ZEROSHOT_MLM']['zs_mlm_onnx_mdl_name']
+##example
+# zero_shot_classification(premise='Tiny worms and breath analyzers could screen for disease while it’s early and treatable',
+#                          labels='science, sports, museum')
 def zero_shot_classification(premise: str, labels: str, model, tokenizer):
     """
     return df
+def create_onnx_model_zs_nli(zs_chkpt,zs_onnx_mdl_dir):
     """
     Args:
     if not os.path.exists(zs_onnx_mdl_dir):
         try:
             subprocess.run(['python3', '-m', 'transformers.onnx',
+                            f'--model={zs_chkpt}',
                             '--feature=sequence-classification',
                             '--atol=1e-3',
                             zs_onnx_mdl_dir])
     return df
+def create_onnx_model_zs_mlm(zs_mlm_chkpt,zs_mlm_onnx_mdl_dir):
     """
     Args:
     final_input= f"{premise}.{hypothesis} [MASK]" #this can change depending on chkpt, this is for bert-base-uncased chkpt
+    _inputs=_tokenizer(final_input,padding=True, truncation=True,return_tensors="pt")
+    ## lowers the performance
+    # premise_token_ids=_tokenizer.encode(premise,add_special_tokens=False)
+    # hypothesis_token_ids=_tokenizer.encode(hypothesis,add_special_tokens=False)
+    #
+    # #creating inputs ids
+    # input_ids=[_tokenizer.cls_token_id]+premise_token_ids+[_tokenizer.sep_token_id]+hypothesis_token_ids+[_tokenizer.sep_token_id]
+    # input_ids=np.array(input_ids)
+    #
+    # #creating token type ids
+    # premise_len=len(premise_token_ids)
+    # hypothesis_len=len(hypothesis_token_ids)
+    # token_type_ids=np.array([0]*(premise_len+2)+[1]*(hypothesis_len+1))
+    #
+    # #creating attention mask
+    # attention_mask=np.array([1]*(premise_len+hypothesis_len+3))
+    #
+    # input_feed={
+    #     'input_ids': np.expand_dims(input_ids,axis=0),
+    #     'token_type_ids': np.expand_dims(token_type_ids,0),
+    #     'attention_mask': np.expand_dims(attention_mask,0)
+    # }
     input_feed={
         'input_ids': np.array(_inputs['input_ids']),
         'attention_mask': np.array(_inputs['attention_mask'])
     }
     output=_session.run(output_names=['logits'],input_feed=dict(input_feed))[0]
     mask_token_index = np.argwhere(_inputs["input_ids"] == _tokenizer.mask_token_id)[1,0]