Spaces:
Runtime error
Runtime error
ashishraics
commited on
Commit
•
d77ac81
1
Parent(s):
d670ff4
structure using config.yaml
Browse files- app.py +8 -9
- sentiment_clf_helper.py +1 -2
- zeroshot_clf_helper.py +33 -11
app.py
CHANGED
@@ -156,12 +156,11 @@ def sentiment_task_selected(task,
|
|
156 |
sent_onnx_mdl_name=sent_onnx_mdl_name,
|
157 |
sent_onnx_quant_mdl_name=sent_onnx_quant_mdl_name):
|
158 |
##model & tokenizer initialization for normal sentiment classification
|
159 |
-
|
160 |
-
# tokenizer_sentiment=AutoTokenizer.from_pretrained(sent_chkpt)
|
161 |
tokenizer_sentiment = AutoTokenizer.from_pretrained(sent_mdl_dir)
|
162 |
|
163 |
# # create onnx model for sentiment classification but once created in your local app comment this out
|
164 |
-
|
165 |
|
166 |
#create inference session
|
167 |
sentiment_session = ort.InferenceSession(f"{sent_onnx_mdl_dir}/{sent_onnx_mdl_name}",sess_options=session_options_ort)
|
@@ -191,7 +190,7 @@ def zs_nli_task_selected(task,
|
|
191 |
tokenizer_zs = AutoTokenizer.from_pretrained(zs_mdl_dir)
|
192 |
|
193 |
## create onnx model for zeroshot but once created locally comment it out.
|
194 |
-
|
195 |
|
196 |
#create inference session from onnx model
|
197 |
zs_session = ort.InferenceSession(f"{zs_onnx_mdl_dir}/{zs_onnx_mdl_name}",sess_options=session_options_ort)
|
@@ -203,7 +202,7 @@ def zs_nli_task_selected(task,
|
|
203 |
|
204 |
############### Pre-Download & instantiate objects for Zero shot clf NLI *********************** START **********************
|
205 |
## create model/token dir for zeroshot clf -- already created so not required
|
206 |
-
|
207 |
|
208 |
@st.cache(allow_output_mutation=True, suppress_st_warning=True, max_entries=None, ttl=None)
|
209 |
def zs_mlm_task_selected(task,
|
@@ -218,9 +217,7 @@ def zs_mlm_task_selected(task,
|
|
218 |
tokenizer_zs_mlm = AutoTokenizer.from_pretrained(zs_mlm_mdl_dir)
|
219 |
|
220 |
# # create onnx model for zeroshot but once created locally comment it out.
|
221 |
-
|
222 |
-
# _tokenizer=tokenizer_zs_mlm,
|
223 |
-
# zs_mlm_onnx_mdl_dir=zs_mlm_onnx_mdl_dir)
|
224 |
|
225 |
# create inference session from onnx model
|
226 |
zs_session_mlm = ort.InferenceSession(f"{zs_mlm_onnx_mdl_dir}/{zs_mlm_onnx_mdl_name}", sess_options=session_options_ort)
|
@@ -293,7 +290,7 @@ if select_task=='Zero Shot Classification':
|
|
293 |
t2 = time.time()
|
294 |
st.write(f"Total time to load MLM Model is {(t2-t1)*1000:.1f} ms")
|
295 |
|
296 |
-
st.subheader("Zero Shot Classification using NLI")
|
297 |
input_texts = st.text_input(label="Input text to classify into topics")
|
298 |
input_lables = st.text_input(label="Enter labels separated by commas")
|
299 |
input_hypothesis = st.text_input(label="Enter your hypothesis",value="This is an example of")
|
@@ -334,6 +331,8 @@ if select_task=='Zero Shot Classification':
|
|
334 |
)
|
335 |
end=time.time()
|
336 |
st.write(f"Time taken for computation {(end - start) * 1000:.1f} ms")
|
|
|
|
|
337 |
|
338 |
fig = px.bar(x='Probability',
|
339 |
y='Labels',
|
|
|
156 |
sent_onnx_mdl_name=sent_onnx_mdl_name,
|
157 |
sent_onnx_quant_mdl_name=sent_onnx_quant_mdl_name):
|
158 |
##model & tokenizer initialization for normal sentiment classification
|
159 |
+
model_sentiment=AutoModelForSequenceClassification.from_pretrained(sent_mdl_dir)
|
|
|
160 |
tokenizer_sentiment = AutoTokenizer.from_pretrained(sent_mdl_dir)
|
161 |
|
162 |
# # create onnx model for sentiment classification but once created in your local app comment this out
|
163 |
+
create_onnx_model_sentiment(_model=model_sentiment, _tokenizer=tokenizer_sentiment)
|
164 |
|
165 |
#create inference session
|
166 |
sentiment_session = ort.InferenceSession(f"{sent_onnx_mdl_dir}/{sent_onnx_mdl_name}",sess_options=session_options_ort)
|
|
|
190 |
tokenizer_zs = AutoTokenizer.from_pretrained(zs_mdl_dir)
|
191 |
|
192 |
## create onnx model for zeroshot but once created locally comment it out.
|
193 |
+
create_onnx_model_zs_nli(zs_chkpt=zs_chkpt,zs_onnx_mdl_dir=zs_onnx_mdl_dir)
|
194 |
|
195 |
#create inference session from onnx model
|
196 |
zs_session = ort.InferenceSession(f"{zs_onnx_mdl_dir}/{zs_onnx_mdl_name}",sess_options=session_options_ort)
|
|
|
202 |
|
203 |
############### Pre-Download & instantiate objects for Zero shot clf NLI *********************** START **********************
|
204 |
## create model/token dir for zeroshot clf -- already created so not required
|
205 |
+
create_model_dir(chkpt=zs_mlm_chkpt, model_dir=zs_mlm_mdl_dir, task_type='mlm')
|
206 |
|
207 |
@st.cache(allow_output_mutation=True, suppress_st_warning=True, max_entries=None, ttl=None)
|
208 |
def zs_mlm_task_selected(task,
|
|
|
217 |
tokenizer_zs_mlm = AutoTokenizer.from_pretrained(zs_mlm_mdl_dir)
|
218 |
|
219 |
# # create onnx model for zeroshot but once created locally comment it out.
|
220 |
+
create_onnx_model_zs_mlm(zs_mlm_chkpt=zs_mlm_chkpt,zs_mlm_onnx_mdl_dir=zs_mlm_onnx_mdl_dir)
|
|
|
|
|
221 |
|
222 |
# create inference session from onnx model
|
223 |
zs_session_mlm = ort.InferenceSession(f"{zs_mlm_onnx_mdl_dir}/{zs_mlm_onnx_mdl_name}", sess_options=session_options_ort)
|
|
|
290 |
t2 = time.time()
|
291 |
st.write(f"Total time to load MLM Model is {(t2-t1)*1000:.1f} ms")
|
292 |
|
293 |
+
st.subheader("Zero Shot Classification using NLI & MLM")
|
294 |
input_texts = st.text_input(label="Input text to classify into topics")
|
295 |
input_lables = st.text_input(label="Enter labels separated by commas")
|
296 |
input_hypothesis = st.text_input(label="Enter your hypothesis",value="This is an example of")
|
|
|
331 |
)
|
332 |
end=time.time()
|
333 |
st.write(f"Time taken for computation {(end - start) * 1000:.1f} ms")
|
334 |
+
st.write(f"Currently hypothesis and premise have *single token_type_ids* ."
|
335 |
+
f"Once updated for different *token_type_ids* expect the model performance to increase.")
|
336 |
|
337 |
fig = px.bar(x='Probability',
|
338 |
y='Labels',
|
sentiment_clf_helper.py
CHANGED
@@ -5,9 +5,8 @@ import transformers.convert_graph_to_onnx as onnx_convert
|
|
5 |
from pathlib import Path
|
6 |
import os
|
7 |
import torch
|
8 |
-
|
9 |
-
|
10 |
import yaml
|
|
|
11 |
def read_yaml(file_path):
|
12 |
with open(file_path, "r") as f:
|
13 |
return yaml.safe_load(f)
|
|
|
5 |
from pathlib import Path
|
6 |
import os
|
7 |
import torch
|
|
|
|
|
8 |
import yaml
|
9 |
+
|
10 |
def read_yaml(file_path):
|
11 |
with open(file_path, "r") as f:
|
12 |
return yaml.safe_load(f)
|
zeroshot_clf_helper.py
CHANGED
@@ -26,6 +26,10 @@ zs_mlm_mdl_dir=config['ZEROSHOT_MLM']['zs_mlm_mdl_dir']
|
|
26 |
zs_mlm_onnx_mdl_dir=config['ZEROSHOT_MLM']['zs_mlm_onnx_mdl_dir']
|
27 |
zs_mlm_onnx_mdl_name=config['ZEROSHOT_MLM']['zs_mlm_onnx_mdl_name']
|
28 |
|
|
|
|
|
|
|
|
|
29 |
|
30 |
def zero_shot_classification(premise: str, labels: str, model, tokenizer):
|
31 |
"""
|
@@ -67,12 +71,7 @@ def zero_shot_classification(premise: str, labels: str, model, tokenizer):
|
|
67 |
|
68 |
return df
|
69 |
|
70 |
-
|
71 |
-
# zero_shot_classification(premise='Tiny worms and breath analyzers could screen for disease while it’s early and treatable',
|
72 |
-
# labels='science, sports, museum')
|
73 |
-
|
74 |
-
|
75 |
-
def create_onnx_model_zs_nli(zs_onnx_mdl_dir=zs_onnx_mdl_dir):
|
76 |
"""
|
77 |
|
78 |
Args:
|
@@ -86,7 +85,7 @@ def create_onnx_model_zs_nli(zs_onnx_mdl_dir=zs_onnx_mdl_dir):
|
|
86 |
if not os.path.exists(zs_onnx_mdl_dir):
|
87 |
try:
|
88 |
subprocess.run(['python3', '-m', 'transformers.onnx',
|
89 |
-
'--model=
|
90 |
'--feature=sequence-classification',
|
91 |
'--atol=1e-3',
|
92 |
zs_onnx_mdl_dir])
|
@@ -148,8 +147,7 @@ def zero_shot_classification_nli_onnx(premise,labels,_session,_tokenizer,hypothe
|
|
148 |
|
149 |
return df
|
150 |
|
151 |
-
|
152 |
-
def create_onnx_model_zs_mlm(_model, _tokenizer,zs_mlm_onnx_mdl_dir=zs_mlm_onnx_mdl_dir):
|
153 |
"""
|
154 |
|
155 |
Args:
|
@@ -196,8 +194,31 @@ def zero_shot_classification_fillmask_onnx(premise,hypothesis,labels,_session,_t
|
|
196 |
|
197 |
final_input= f"{premise}.{hypothesis} [MASK]" #this can change depending on chkpt, this is for bert-base-uncased chkpt
|
198 |
|
199 |
-
_inputs=_tokenizer(final_input,padding=True, truncation=True,
|
200 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
|
202 |
input_feed={
|
203 |
'input_ids': np.array(_inputs['input_ids']),
|
@@ -205,6 +226,7 @@ def zero_shot_classification_fillmask_onnx(premise,hypothesis,labels,_session,_t
|
|
205 |
'attention_mask': np.array(_inputs['attention_mask'])
|
206 |
}
|
207 |
|
|
|
208 |
output=_session.run(output_names=['logits'],input_feed=dict(input_feed))[0]
|
209 |
|
210 |
mask_token_index = np.argwhere(_inputs["input_ids"] == _tokenizer.mask_token_id)[1,0]
|
|
|
26 |
zs_mlm_onnx_mdl_dir=config['ZEROSHOT_MLM']['zs_mlm_onnx_mdl_dir']
|
27 |
zs_mlm_onnx_mdl_name=config['ZEROSHOT_MLM']['zs_mlm_onnx_mdl_name']
|
28 |
|
29 |
+
##example
|
30 |
+
# zero_shot_classification(premise='Tiny worms and breath analyzers could screen for disease while it’s early and treatable',
|
31 |
+
# labels='science, sports, museum')
|
32 |
+
|
33 |
|
34 |
def zero_shot_classification(premise: str, labels: str, model, tokenizer):
|
35 |
"""
|
|
|
71 |
|
72 |
return df
|
73 |
|
74 |
+
def create_onnx_model_zs_nli(zs_chkpt,zs_onnx_mdl_dir):
|
|
|
|
|
|
|
|
|
|
|
75 |
"""
|
76 |
|
77 |
Args:
|
|
|
85 |
if not os.path.exists(zs_onnx_mdl_dir):
|
86 |
try:
|
87 |
subprocess.run(['python3', '-m', 'transformers.onnx',
|
88 |
+
f'--model={zs_chkpt}',
|
89 |
'--feature=sequence-classification',
|
90 |
'--atol=1e-3',
|
91 |
zs_onnx_mdl_dir])
|
|
|
147 |
|
148 |
return df
|
149 |
|
150 |
+
def create_onnx_model_zs_mlm(zs_mlm_chkpt,zs_mlm_onnx_mdl_dir):
|
|
|
151 |
"""
|
152 |
|
153 |
Args:
|
|
|
194 |
|
195 |
final_input= f"{premise}.{hypothesis} [MASK]" #this can change depending on chkpt, this is for bert-base-uncased chkpt
|
196 |
|
197 |
+
_inputs=_tokenizer(final_input,padding=True, truncation=True,return_tensors="pt")
|
198 |
+
|
199 |
+
|
200 |
+
## lowers the performance
|
201 |
+
# premise_token_ids=_tokenizer.encode(premise,add_special_tokens=False)
|
202 |
+
# hypothesis_token_ids=_tokenizer.encode(hypothesis,add_special_tokens=False)
|
203 |
+
#
|
204 |
+
# #creating inputs ids
|
205 |
+
# input_ids=[_tokenizer.cls_token_id]+premise_token_ids+[_tokenizer.sep_token_id]+hypothesis_token_ids+[_tokenizer.sep_token_id]
|
206 |
+
# input_ids=np.array(input_ids)
|
207 |
+
#
|
208 |
+
# #creating token type ids
|
209 |
+
# premise_len=len(premise_token_ids)
|
210 |
+
# hypothesis_len=len(hypothesis_token_ids)
|
211 |
+
# token_type_ids=np.array([0]*(premise_len+2)+[1]*(hypothesis_len+1))
|
212 |
+
#
|
213 |
+
# #creating attention mask
|
214 |
+
# attention_mask=np.array([1]*(premise_len+hypothesis_len+3))
|
215 |
+
#
|
216 |
+
# input_feed={
|
217 |
+
# 'input_ids': np.expand_dims(input_ids,axis=0),
|
218 |
+
# 'token_type_ids': np.expand_dims(token_type_ids,0),
|
219 |
+
# 'attention_mask': np.expand_dims(attention_mask,0)
|
220 |
+
# }
|
221 |
+
|
222 |
|
223 |
input_feed={
|
224 |
'input_ids': np.array(_inputs['input_ids']),
|
|
|
226 |
'attention_mask': np.array(_inputs['attention_mask'])
|
227 |
}
|
228 |
|
229 |
+
|
230 |
output=_session.run(output_names=['logits'],input_feed=dict(input_feed))[0]
|
231 |
|
232 |
mask_token_index = np.argwhere(_inputs["input_ids"] == _tokenizer.mask_token_id)[1,0]
|