ashishraics commited on
Commit
d77ac81
1 Parent(s): d670ff4

structure using config.yaml

Browse files
Files changed (3) hide show
  1. app.py +8 -9
  2. sentiment_clf_helper.py +1 -2
  3. zeroshot_clf_helper.py +33 -11
app.py CHANGED
@@ -156,12 +156,11 @@ def sentiment_task_selected(task,
156
  sent_onnx_mdl_name=sent_onnx_mdl_name,
157
  sent_onnx_quant_mdl_name=sent_onnx_quant_mdl_name):
158
  ##model & tokenizer initialization for normal sentiment classification
159
- # model_sentiment=AutoModelForSequenceClassification.from_pretrained(sent_chkpt)
160
- # tokenizer_sentiment=AutoTokenizer.from_pretrained(sent_chkpt)
161
  tokenizer_sentiment = AutoTokenizer.from_pretrained(sent_mdl_dir)
162
 
163
  # # create onnx model for sentiment classification but once created in your local app comment this out
164
- # create_onnx_model_sentiment(_model=model_sentiment, _tokenizer=tokenizer_sentiment)
165
 
166
  #create inference session
167
  sentiment_session = ort.InferenceSession(f"{sent_onnx_mdl_dir}/{sent_onnx_mdl_name}",sess_options=session_options_ort)
@@ -191,7 +190,7 @@ def zs_nli_task_selected(task,
191
  tokenizer_zs = AutoTokenizer.from_pretrained(zs_mdl_dir)
192
 
193
  ## create onnx model for zeroshot but once created locally comment it out.
194
- #create_onnx_model_zs_nli()
195
 
196
  #create inference session from onnx model
197
  zs_session = ort.InferenceSession(f"{zs_onnx_mdl_dir}/{zs_onnx_mdl_name}",sess_options=session_options_ort)
@@ -203,7 +202,7 @@ def zs_nli_task_selected(task,
203
 
204
  ############### Pre-Download & instantiate objects for Zero shot clf NLI *********************** START **********************
205
  ## create model/token dir for zeroshot clf -- already created so not required
206
- # create_model_dir(chkpt=zs_mlm_chkpt, model_dir=zs_mlm_mdl_dir, task_type='mlm')
207
 
208
  @st.cache(allow_output_mutation=True, suppress_st_warning=True, max_entries=None, ttl=None)
209
  def zs_mlm_task_selected(task,
@@ -218,9 +217,7 @@ def zs_mlm_task_selected(task,
218
  tokenizer_zs_mlm = AutoTokenizer.from_pretrained(zs_mlm_mdl_dir)
219
 
220
  # # create onnx model for zeroshot but once created locally comment it out.
221
- # create_onnx_model_zs_mlm(_model=model_zs_mlm,
222
- # _tokenizer=tokenizer_zs_mlm,
223
- # zs_mlm_onnx_mdl_dir=zs_mlm_onnx_mdl_dir)
224
 
225
  # create inference session from onnx model
226
  zs_session_mlm = ort.InferenceSession(f"{zs_mlm_onnx_mdl_dir}/{zs_mlm_onnx_mdl_name}", sess_options=session_options_ort)
@@ -293,7 +290,7 @@ if select_task=='Zero Shot Classification':
293
  t2 = time.time()
294
  st.write(f"Total time to load MLM Model is {(t2-t1)*1000:.1f} ms")
295
 
296
- st.subheader("Zero Shot Classification using NLI")
297
  input_texts = st.text_input(label="Input text to classify into topics")
298
  input_lables = st.text_input(label="Enter labels separated by commas")
299
  input_hypothesis = st.text_input(label="Enter your hypothesis",value="This is an example of")
@@ -334,6 +331,8 @@ if select_task=='Zero Shot Classification':
334
  )
335
  end=time.time()
336
  st.write(f"Time taken for computation {(end - start) * 1000:.1f} ms")
 
 
337
 
338
  fig = px.bar(x='Probability',
339
  y='Labels',
 
156
  sent_onnx_mdl_name=sent_onnx_mdl_name,
157
  sent_onnx_quant_mdl_name=sent_onnx_quant_mdl_name):
158
  ##model & tokenizer initialization for normal sentiment classification
159
+ model_sentiment=AutoModelForSequenceClassification.from_pretrained(sent_mdl_dir)
 
160
  tokenizer_sentiment = AutoTokenizer.from_pretrained(sent_mdl_dir)
161
 
162
  # # create onnx model for sentiment classification but once created in your local app comment this out
163
+ create_onnx_model_sentiment(_model=model_sentiment, _tokenizer=tokenizer_sentiment)
164
 
165
  #create inference session
166
  sentiment_session = ort.InferenceSession(f"{sent_onnx_mdl_dir}/{sent_onnx_mdl_name}",sess_options=session_options_ort)
 
190
  tokenizer_zs = AutoTokenizer.from_pretrained(zs_mdl_dir)
191
 
192
  ## create onnx model for zeroshot but once created locally comment it out.
193
+ create_onnx_model_zs_nli(zs_chkpt=zs_chkpt,zs_onnx_mdl_dir=zs_onnx_mdl_dir)
194
 
195
  #create inference session from onnx model
196
  zs_session = ort.InferenceSession(f"{zs_onnx_mdl_dir}/{zs_onnx_mdl_name}",sess_options=session_options_ort)
 
202
 
203
  ############### Pre-Download & instantiate objects for Zero shot clf NLI *********************** START **********************
204
  ## create model/token dir for zeroshot clf -- already created so not required
205
+ create_model_dir(chkpt=zs_mlm_chkpt, model_dir=zs_mlm_mdl_dir, task_type='mlm')
206
 
207
  @st.cache(allow_output_mutation=True, suppress_st_warning=True, max_entries=None, ttl=None)
208
  def zs_mlm_task_selected(task,
 
217
  tokenizer_zs_mlm = AutoTokenizer.from_pretrained(zs_mlm_mdl_dir)
218
 
219
  # # create onnx model for zeroshot but once created locally comment it out.
220
+ create_onnx_model_zs_mlm(zs_mlm_chkpt=zs_mlm_chkpt,zs_mlm_onnx_mdl_dir=zs_mlm_onnx_mdl_dir)
 
 
221
 
222
  # create inference session from onnx model
223
  zs_session_mlm = ort.InferenceSession(f"{zs_mlm_onnx_mdl_dir}/{zs_mlm_onnx_mdl_name}", sess_options=session_options_ort)
 
290
  t2 = time.time()
291
  st.write(f"Total time to load MLM Model is {(t2-t1)*1000:.1f} ms")
292
 
293
+ st.subheader("Zero Shot Classification using NLI & MLM")
294
  input_texts = st.text_input(label="Input text to classify into topics")
295
  input_lables = st.text_input(label="Enter labels separated by commas")
296
  input_hypothesis = st.text_input(label="Enter your hypothesis",value="This is an example of")
 
331
  )
332
  end=time.time()
333
  st.write(f"Time taken for computation {(end - start) * 1000:.1f} ms")
334
+ st.write(f"Currently hypothesis and premise have *single token_type_ids* ."
335
+ f"Once updated for different *token_type_ids* expect the model performance to increase.")
336
 
337
  fig = px.bar(x='Probability',
338
  y='Labels',
sentiment_clf_helper.py CHANGED
@@ -5,9 +5,8 @@ import transformers.convert_graph_to_onnx as onnx_convert
5
  from pathlib import Path
6
  import os
7
  import torch
8
-
9
-
10
  import yaml
 
11
  def read_yaml(file_path):
12
  with open(file_path, "r") as f:
13
  return yaml.safe_load(f)
 
5
  from pathlib import Path
6
  import os
7
  import torch
 
 
8
  import yaml
9
+
10
  def read_yaml(file_path):
11
  with open(file_path, "r") as f:
12
  return yaml.safe_load(f)
zeroshot_clf_helper.py CHANGED
@@ -26,6 +26,10 @@ zs_mlm_mdl_dir=config['ZEROSHOT_MLM']['zs_mlm_mdl_dir']
26
  zs_mlm_onnx_mdl_dir=config['ZEROSHOT_MLM']['zs_mlm_onnx_mdl_dir']
27
  zs_mlm_onnx_mdl_name=config['ZEROSHOT_MLM']['zs_mlm_onnx_mdl_name']
28
 
 
 
 
 
29
 
30
  def zero_shot_classification(premise: str, labels: str, model, tokenizer):
31
  """
@@ -67,12 +71,7 @@ def zero_shot_classification(premise: str, labels: str, model, tokenizer):
67
 
68
  return df
69
 
70
- ##example
71
- # zero_shot_classification(premise='Tiny worms and breath analyzers could screen for disease while it’s early and treatable',
72
- # labels='science, sports, museum')
73
-
74
-
75
- def create_onnx_model_zs_nli(zs_onnx_mdl_dir=zs_onnx_mdl_dir):
76
  """
77
 
78
  Args:
@@ -86,7 +85,7 @@ def create_onnx_model_zs_nli(zs_onnx_mdl_dir=zs_onnx_mdl_dir):
86
  if not os.path.exists(zs_onnx_mdl_dir):
87
  try:
88
  subprocess.run(['python3', '-m', 'transformers.onnx',
89
- '--model=valhalla/distilbart-mnli-12-1',
90
  '--feature=sequence-classification',
91
  '--atol=1e-3',
92
  zs_onnx_mdl_dir])
@@ -148,8 +147,7 @@ def zero_shot_classification_nli_onnx(premise,labels,_session,_tokenizer,hypothe
148
 
149
  return df
150
 
151
-
152
- def create_onnx_model_zs_mlm(_model, _tokenizer,zs_mlm_onnx_mdl_dir=zs_mlm_onnx_mdl_dir):
153
  """
154
 
155
  Args:
@@ -196,8 +194,31 @@ def zero_shot_classification_fillmask_onnx(premise,hypothesis,labels,_session,_t
196
 
197
  final_input= f"{premise}.{hypothesis} [MASK]" #this can change depending on chkpt, this is for bert-base-uncased chkpt
198
 
199
- _inputs=_tokenizer(final_input,padding=True, truncation=True,
200
- return_tensors="pt")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
  input_feed={
203
  'input_ids': np.array(_inputs['input_ids']),
@@ -205,6 +226,7 @@ def zero_shot_classification_fillmask_onnx(premise,hypothesis,labels,_session,_t
205
  'attention_mask': np.array(_inputs['attention_mask'])
206
  }
207
 
 
208
  output=_session.run(output_names=['logits'],input_feed=dict(input_feed))[0]
209
 
210
  mask_token_index = np.argwhere(_inputs["input_ids"] == _tokenizer.mask_token_id)[1,0]
 
26
  zs_mlm_onnx_mdl_dir=config['ZEROSHOT_MLM']['zs_mlm_onnx_mdl_dir']
27
  zs_mlm_onnx_mdl_name=config['ZEROSHOT_MLM']['zs_mlm_onnx_mdl_name']
28
 
29
+ ##example
30
+ # zero_shot_classification(premise='Tiny worms and breath analyzers could screen for disease while it’s early and treatable',
31
+ # labels='science, sports, museum')
32
+
33
 
34
  def zero_shot_classification(premise: str, labels: str, model, tokenizer):
35
  """
 
71
 
72
  return df
73
 
74
+ def create_onnx_model_zs_nli(zs_chkpt,zs_onnx_mdl_dir):
 
 
 
 
 
75
  """
76
 
77
  Args:
 
85
  if not os.path.exists(zs_onnx_mdl_dir):
86
  try:
87
  subprocess.run(['python3', '-m', 'transformers.onnx',
88
+ f'--model={zs_chkpt}',
89
  '--feature=sequence-classification',
90
  '--atol=1e-3',
91
  zs_onnx_mdl_dir])
 
147
 
148
  return df
149
 
150
+ def create_onnx_model_zs_mlm(zs_mlm_chkpt,zs_mlm_onnx_mdl_dir):
 
151
  """
152
 
153
  Args:
 
194
 
195
  final_input= f"{premise}.{hypothesis} [MASK]" #this can change depending on chkpt, this is for bert-base-uncased chkpt
196
 
197
+ _inputs=_tokenizer(final_input,padding=True, truncation=True,return_tensors="pt")
198
+
199
+
200
+ ## lowers the performance
201
+ # premise_token_ids=_tokenizer.encode(premise,add_special_tokens=False)
202
+ # hypothesis_token_ids=_tokenizer.encode(hypothesis,add_special_tokens=False)
203
+ #
204
+ # #creating inputs ids
205
+ # input_ids=[_tokenizer.cls_token_id]+premise_token_ids+[_tokenizer.sep_token_id]+hypothesis_token_ids+[_tokenizer.sep_token_id]
206
+ # input_ids=np.array(input_ids)
207
+ #
208
+ # #creating token type ids
209
+ # premise_len=len(premise_token_ids)
210
+ # hypothesis_len=len(hypothesis_token_ids)
211
+ # token_type_ids=np.array([0]*(premise_len+2)+[1]*(hypothesis_len+1))
212
+ #
213
+ # #creating attention mask
214
+ # attention_mask=np.array([1]*(premise_len+hypothesis_len+3))
215
+ #
216
+ # input_feed={
217
+ # 'input_ids': np.expand_dims(input_ids,axis=0),
218
+ # 'token_type_ids': np.expand_dims(token_type_ids,0),
219
+ # 'attention_mask': np.expand_dims(attention_mask,0)
220
+ # }
221
+
222
 
223
  input_feed={
224
  'input_ids': np.array(_inputs['input_ids']),
 
226
  'attention_mask': np.array(_inputs['attention_mask'])
227
  }
228
 
229
+
230
  output=_session.run(output_names=['logits'],input_feed=dict(input_feed))[0]
231
 
232
  mask_token_index = np.argwhere(_inputs["input_ids"] == _tokenizer.mask_token_id)[1,0]