Spaces:

ashishraics
/

NLP

Runtime error

App Files Files Community

ashishraics commited on May 14, 2022

Commit

8bb7965

•

1 Parent(s): 356e503

optimize app

Browse files

Files changed (13) hide show

.gitignore +3 -3
app.py +33 -40
sentiment_model_dir/config.json +34 -0
sentiment_model_dir/special_tokens_map.json +1 -0
sentiment_model_dir/tokenizer.json +0 -0
sentiment_model_dir/tokenizer_config.json +1 -0
sentiment_model_dir/vocab.txt +0 -0
zs_model_dir/config.json +58 -0
zs_model_dir/merges.txt +0 -0
zs_model_dir/special_tokens_map.json +1 -0
zs_model_dir/tokenizer.json +0 -0
zs_model_dir/tokenizer_config.json +1 -0
zs_model_dir/vocab.json +0 -0

.gitignore CHANGED Viewed

@@ -1,6 +1,6 @@
 venv/
-sentiment_model_dir/
-sent_mdl_dir/
-zs_model_dir/
 #sent_clf_onnx_dir/
 #zs_onnx_dir/

 venv/
+#exclude model files as they are large
+sentiment_model_dir/pytorch_model.bin
+zs_model_dir/pytorch_model.bin
 #sent_clf_onnx_dir/
 #zs_onnx_dir/

app.py CHANGED Viewed

@@ -103,10 +103,12 @@ def create_model_dir(chkpt, model_dir):
         pass
-st.title("NLP use cases")
 with st.sidebar:
-    st.title("NLP tasks")
     select_task=st.selectbox(label="Select task from drop down menu",
                  options=['README',
                           'Detect Sentiment','Zero Shot Classification'])
@@ -114,7 +116,7 @@ with st.sidebar:
 ############### Pre-Download & instantiate objects for sentiment analysis *********************** START **********************
 # #create model/token dir for sentiment classification for faster inference
-# create_model_dir(chkpt=sent_chkpt, model_dir=sent_mdl_dir)
 @st.cache(allow_output_mutation=True, suppress_st_warning=True, max_entries=None, ttl=None)
@@ -125,26 +127,26 @@ def sentiment_task_selected(task,
                             sent_onnx_mdl_name=sent_onnx_mdl_name,
                             sent_onnx_quant_mdl_name=sent_onnx_quant_mdl_name):
     #model & tokenizer initialization for normal sentiment classification
-    model_sentiment=AutoModelForSequenceClassification.from_pretrained(sent_chkpt)
-    tokenizer_sentiment=AutoTokenizer.from_pretrained(sent_chkpt)
-    # create onnx model for sentiment classification
-    create_onnx_model_sentiment(_model=model_sentiment, _tokenizer=tokenizer_sentiment)
     #create inference session
     sentiment_session = ort.InferenceSession(f"{sent_onnx_mdl_dir}/{sent_onnx_mdl_name}")
     # sentiment_session_quant = ort.InferenceSession(f"{sent_onnx_mdl_dir}/{sent_onnx_quant_mdl_name}")
-    return model_sentiment,tokenizer_sentiment,sentiment_session
 ############## Pre-Download & instantiate objects for sentiment analysis ********************* END **********************************
 ############### Pre-Download & instantiate objects for Zero shot clf *********************** START **********************
-# # create model/token dir for zeroshot clf
-# create_model_dir(chkpt=zs_chkpt, model_dir=zs_mdl_dir)
 @st.cache(allow_output_mutation=True, suppress_st_warning=True, max_entries=None, ttl=None)
 def zs_task_selected(task,
@@ -157,10 +159,11 @@ def zs_task_selected(task,
     ##model & tokenizer initialization for normal ZS classification
     # model_zs=AutoModelForSequenceClassification.from_pretrained(zs_chkpt)
     # we just need tokenizer for inference and not model since onnx model is already saved
-    tokenizer_zs=AutoTokenizer.from_pretrained(zs_chkpt)
-    # create onnx model for zeroshot
-    create_onnx_model_zs()
     #create inference session from onnx model
     zs_session = ort.InferenceSession(f"{zs_onnx_mdl_dir}/{zs_onnx_mdl_name}")
@@ -172,11 +175,11 @@ def zs_task_selected(task,
 if select_task=='README':
     st.header("NLP Summary")
 if select_task == 'Detect Sentiment':
     t1=time.time()
-    model_sentiment,tokenizer_sentiment,\
-    sentiment_session = sentiment_task_selected(task=select_task)
     t2 = time.time()
     st.write(f"Total time to load Model is {(t2-t1)*1000:.1f} ms")
@@ -185,28 +188,16 @@ if select_task == 'Detect Sentiment':
     c1,c2,_,_=st.columns(4)
     with c1:
-        response1=st.button("Normal runtime")
-    with c2:
-        response2=st.button("ONNX runtime")
-    if any([response1,response2]):
-        if response1:
-            start=time.time()
-            sentiments = classify_sentiment(input_texts,
-                                            model=model_sentiment,
-                                            tokenizer=tokenizer_sentiment
-                                            )
-            end=time.time()
-            st.write(f"Time taken for computation {(end-start)*1000:.1f} ms")
-        elif response2:
-            start = time.time()
-            sentiments=classify_sentiment_onnx(input_texts,
-                                               _session=sentiment_session,
-                                               _tokenizer=tokenizer_sentiment)
-            end = time.time()
-            st.write(f"Time taken for computation {(end - start) * 1000:.1f} ms")
-        else:
-            pass
         for i,t in enumerate(input_texts.split(',')):
             if sentiments[i]=='Positive':
                 response=st_text_rater(t + f"--> This statement is {sentiments[i]}",
@@ -214,6 +205,8 @@ if select_task == 'Detect Sentiment':
             else:
                 response = st_text_rater(t + f"--> This statement is {sentiments[i]}",
                                          color_background='rgb(233, 116, 81)',key=t)
 if select_task=='Zero Shot Classification':
     t1=time.time()
@@ -228,7 +221,7 @@ if select_task=='Zero Shot Classification':
     c1,_,_,_=st.columns(4)
     with c1:
-        response1=st.button("Compute with ONNX runtime")
     if response1:
         start = time.time()

         pass
+#title using markdown
+st.markdown("<h1 style='text-align: center; color: #3366ff;'>NLP Basic Use Cases</h1>", unsafe_allow_html=True)
+st.markdown("---")
 with st.sidebar:
+    # title using markdown
+    st.markdown("<h1 style='text-align: left; color: ;'>NLP Tasks</h1>", unsafe_allow_html=True)
     select_task=st.selectbox(label="Select task from drop down menu",
                  options=['README',
                           'Detect Sentiment','Zero Shot Classification'])
 ############### Pre-Download & instantiate objects for sentiment analysis *********************** START **********************
 # #create model/token dir for sentiment classification for faster inference
+create_model_dir(chkpt=sent_chkpt, model_dir=sent_mdl_dir)
 @st.cache(allow_output_mutation=True, suppress_st_warning=True, max_entries=None, ttl=None)
                             sent_onnx_mdl_name=sent_onnx_mdl_name,
                             sent_onnx_quant_mdl_name=sent_onnx_quant_mdl_name):
     #model & tokenizer initialization for normal sentiment classification
+    # model_sentiment=AutoModelForSequenceClassification.from_pretrained(sent_chkpt)
+    # tokenizer_sentiment=AutoTokenizer.from_pretrained(sent_chkpt)
+    tokenizer_sentiment = AutoTokenizer.from_pretrained(sent_mdl_dir)
+    # # create onnx model for sentiment classification but once created in your local app comment this out
+    # create_onnx_model_sentiment(_model=model_sentiment, _tokenizer=tokenizer_sentiment)
     #create inference session
     sentiment_session = ort.InferenceSession(f"{sent_onnx_mdl_dir}/{sent_onnx_mdl_name}")
     # sentiment_session_quant = ort.InferenceSession(f"{sent_onnx_mdl_dir}/{sent_onnx_quant_mdl_name}")
+    return tokenizer_sentiment,sentiment_session
 ############## Pre-Download & instantiate objects for sentiment analysis ********************* END **********************************
 ############### Pre-Download & instantiate objects for Zero shot clf *********************** START **********************
+# create model/token dir for zeroshot clf -- already created so not required
+create_model_dir(chkpt=zs_chkpt, model_dir=zs_mdl_dir)
 @st.cache(allow_output_mutation=True, suppress_st_warning=True, max_entries=None, ttl=None)
 def zs_task_selected(task,
     ##model & tokenizer initialization for normal ZS classification
     # model_zs=AutoModelForSequenceClassification.from_pretrained(zs_chkpt)
     # we just need tokenizer for inference and not model since onnx model is already saved
+    # tokenizer_zs=AutoTokenizer.from_pretrained(zs_chkpt)
+    tokenizer_zs = AutoTokenizer.from_pretrained(zs_mdl_dir)
+    # # create onnx model for zeroshot but once created locally comment it out.
+    # create_onnx_model_zs()
     #create inference session from onnx model
     zs_session = ort.InferenceSession(f"{zs_onnx_mdl_dir}/{zs_onnx_mdl_name}")
 if select_task=='README':
     st.header("NLP Summary")
+    # st.write()
 if select_task == 'Detect Sentiment':
     t1=time.time()
+    tokenizer_sentiment,sentiment_session = sentiment_task_selected(task=select_task)
     t2 = time.time()
     st.write(f"Total time to load Model is {(t2-t1)*1000:.1f} ms")
     c1,c2,_,_=st.columns(4)
     with c1:
+        response1=st.button("Compute (ONNX runtime)")
+    if response1:
+        start = time.time()
+        sentiments=classify_sentiment_onnx(input_texts,
+                                           _session=sentiment_session,
+                                           _tokenizer=tokenizer_sentiment)
+        end = time.time()
+        st.write(f"Time taken for computation {(end - start) * 1000:.1f} ms")
         for i,t in enumerate(input_texts.split(',')):
             if sentiments[i]=='Positive':
                 response=st_text_rater(t + f"--> This statement is {sentiments[i]}",
             else:
                 response = st_text_rater(t + f"--> This statement is {sentiments[i]}",
                                          color_background='rgb(233, 116, 81)',key=t)
+    else:
+        pass
 if select_task=='Zero Shot Classification':
     t1=time.time()
     c1,_,_,_=st.columns(4)
     with c1:
+        response1=st.button("Compute (ONNX runtime)")
     if response1:
         start = time.time()

sentiment_model_dir/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "distilbert-base-uncased-finetuned-sst-2-english",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "finetuning_task": "sst-2",
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "NEGATIVE",
+    "1": "POSITIVE"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "NEGATIVE": 0,
+    "POSITIVE": 1
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.18.0",
+  "vocab_size": 30522
+}

sentiment_model_dir/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}

sentiment_model_dir/tokenizer.json ADDED Viewed