Spaces:

NimaKL
/

spamd

Build error

App Files Files Community

NimaKL commited on Oct 3, 2022

Commit

25cba84

•

1 Parent(s): 00addfe

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -48

app.py CHANGED Viewed

@@ -11,53 +11,53 @@ with col1:
 if st.button('Load Model', disabled=False):
     with st.spinner('Wait for it...'):
-    import torch
-    import numpy as np
-    from transformers import AutoTokenizer
-    tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-turkish-uncased")
-    from transformers import AutoModel
-    model = BertForSequenceClassification.from_pretrained("NimaKL/spamd_model")
-    token_id = []
-    attention_masks = []
-    def preprocessing(input_text, tokenizer):
-    '''
-              Returns <class transformers.tokenization_utils_base.BatchEncoding> with the following fields:
-                - input_ids: list of token ids
-                - token_type_ids: list of token type ids
-                - attention_mask: list of indices (0,1) specifying which tokens should considered by the model (return_attention_mask = True).
-    '''
-        return tokenizer.encode_plus(
-        input_text,
-        add_special_tokens = True,
-        max_length = 32,
-        pad_to_max_length = True,
-        return_attention_mask = True,
-        return_tensors = 'pt'
-        )
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    st.success("Model Loaded!")
-        def predict(new_sentence):
-            # We need Token IDs and Attention Mask for inference on the new sentence
-            test_ids = []
-            test_attention_mask = []
-            # Apply the tokenizer
-            encoding = preprocessing(new_sentence, tokenizer)
-            # Extract IDs and Attention Mask
-            test_ids.append(encoding['input_ids'])
-            test_attention_mask.append(encoding['attention_mask'])
-            test_ids = torch.cat(test_ids, dim = 0)
-            test_attention_mask = torch.cat(test_attention_mask, dim = 0)
-            # Forward pass, calculate logit predictions
-                with torch.no_grad():
-                    output = model(test_ids.to(device), token_type_ids = None, attention_mask = test_attention_mask.to(device))
-                    prediction = 'Spam' if np.argmax(output.logits.cpu().numpy()).flatten().item() == 1 else 'Normal'
-                    pred = 'Predicted Class: '+ prediction
-                    with col2:
-                        st.header(pred)
-                        text = st.text_input("Enter the text you'd like to analyze for spam.")
-                        if text or st.button('Analyze'):
-                            predict(text)

 if st.button('Load Model', disabled=False):
     with st.spinner('Wait for it...'):
+        import torch
+        import numpy as np
+        from transformers import AutoTokenizer
+        tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-turkish-uncased")
+        from transformers import AutoModel
+        model = BertForSequenceClassification.from_pretrained("NimaKL/spamd_model")
+        token_id = []
+        attention_masks = []
+        def preprocessing(input_text, tokenizer):
+        '''
+                  Returns <class transformers.tokenization_utils_base.BatchEncoding> with the following fields:
+                    - input_ids: list of token ids
+                    - token_type_ids: list of token type ids
+                    - attention_mask: list of indices (0,1) specifying which tokens should considered by the model (return_attention_mask = True).
+        '''
+            return tokenizer.encode_plus(
+            input_text,
+            add_special_tokens = True,
+            max_length = 32,
+            pad_to_max_length = True,
+            return_attention_mask = True,
+            return_tensors = 'pt'
+            )
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        st.success("Model Loaded!")
+            def predict(new_sentence):
+                # We need Token IDs and Attention Mask for inference on the new sentence
+                test_ids = []
+                test_attention_mask = []
+                # Apply the tokenizer
+                encoding = preprocessing(new_sentence, tokenizer)
+                # Extract IDs and Attention Mask
+                test_ids.append(encoding['input_ids'])
+                test_attention_mask.append(encoding['attention_mask'])
+                test_ids = torch.cat(test_ids, dim = 0)
+                test_attention_mask = torch.cat(test_attention_mask, dim = 0)
+                # Forward pass, calculate logit predictions
+                    with torch.no_grad():
+                        output = model(test_ids.to(device), token_type_ids = None, attention_mask = test_attention_mask.to(device))
+                        prediction = 'Spam' if np.argmax(output.logits.cpu().numpy()).flatten().item() == 1 else 'Normal'
+                        pred = 'Predicted Class: '+ prediction
+                        with col2:
+                            st.header(pred)
+                            text = st.text_input("Enter the text you'd like to analyze for spam.")
+                            if text or st.button('Analyze'):
+                                predict(text)