NimaKL commited on
Commit
d1d30b7
β€’
1 Parent(s): c6f6664

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -24
app.py CHANGED
@@ -4,7 +4,9 @@ from textblob import TextBlob
4
  from transformers import BertForSequenceClassification, AdamW, BertConfig
5
  st.set_page_config(layout='wide', initial_sidebar_state='expanded')
6
  col1, col2= st.columns(2)
7
-
 
 
8
  with col1:
9
  st.title("Spamd: Turkish Spam Detector")
10
  st.markdown("Message spam detection tool for Turkish language. Due the small size of the dataset, I decided to go with transformers technology Google BERT. Using the Turkish pre-trained model BERTurk, I imporved the accuracy of the tool by 18 percent compared to the previous model which used fastText.")
@@ -34,30 +36,28 @@ def preprocessing(input_text, tokenizer):
34
  return_tensors = 'pt'
35
  )
36
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
37
- with col1:
38
- st.success("Model Loaded!")
39
- def predict(new_sentence):
40
  # We need Token IDs and Attention Mask for inference on the new sentence
41
- test_ids = []
42
- test_attention_mask = []
43
- # Apply the tokenizer
44
- encoding = preprocessing(new_sentence, tokenizer)
45
- #Extract IDs and Attention Mask
46
- test_ids.append(encoding['input_ids'])
47
- test_attention_mask.append(encoding['attention_mask'])
48
- test_ids = torch.cat(test_ids, dim = 0)
49
- test_attention_mask = torch.cat(test_attention_mask, dim = 0)
50
- #Forward pass, calculate logit predictions
51
- with torch.no_grad():
52
- output = model(test_ids.to(device), token_type_ids = None, attention_mask = test_attention_mask.to(device))
53
- prediction = 'Spam' if np.argmax(output.logits.cpu().numpy()).flatten().item() == 1 else 'Normal'
54
- pred = 'Predicted Class: '+ prediction
55
- return pred
56
- if text or aButton:
57
- st.text_input("Enter the text you'd like to analyze for spam.")
58
- st.button('Analyze')
59
- with col2:
60
- st.header(predict(text))
61
 
62
 
63
 
 
4
  from transformers import BertForSequenceClassification, AdamW, BertConfig
5
  st.set_page_config(layout='wide', initial_sidebar_state='expanded')
6
  col1, col2= st.columns(2)
7
+ with col2:
8
+ text = st.text_input("Enter the text you'd like to analyze for spam.")
9
+ aButton = st.button('Analyze')
10
  with col1:
11
  st.title("Spamd: Turkish Spam Detector")
12
  st.markdown("Message spam detection tool for Turkish language. Due the small size of the dataset, I decided to go with transformers technology Google BERT. Using the Turkish pre-trained model BERTurk, I imporved the accuracy of the tool by 18 percent compared to the previous model which used fastText.")
 
36
  return_tensors = 'pt'
37
  )
38
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
39
+
40
+ def predict(new_sentence):
 
41
  # We need Token IDs and Attention Mask for inference on the new sentence
42
+ test_ids = []
43
+ test_attention_mask = []
44
+ # Apply the tokenizer
45
+ encoding = preprocessing(new_sentence, tokenizer)
46
+ # Extract IDs and Attention Mask
47
+ test_ids.append(encoding['input_ids'])
48
+ test_attention_mask.append(encoding['attention_mask'])
49
+ test_ids = torch.cat(test_ids, dim = 0)
50
+ test_attention_mask = torch.cat(test_attention_mask, dim = 0)
51
+ # Forward pass, calculate logit predictions
52
+ with torch.no_grad():
53
+ output = model(test_ids.to(device), token_type_ids = None, attention_mask = test_attention_mask.to(device))
54
+ prediction = 'Spam' if np.argmax(output.logits.cpu().numpy()).flatten().item() == 1 else 'Normal'
55
+ pred = 'Predicted Class: '+ prediction
56
+ return pred
57
+
58
+ if text or aButton:
59
+ with col2:
60
+ st.header(predict(text))
 
61
 
62
 
63