Update app.py
Browse files
app.py
CHANGED
@@ -18,19 +18,19 @@ model = BertForSequenceClassification.from_pretrained("NimaKL/spamd_model")
|
|
18 |
token_id = []
|
19 |
attention_masks = []
|
20 |
def preprocessing(input_text, tokenizer):
|
21 |
-
|
22 |
Returns <class transformers.tokenization_utils_base.BatchEncoding> with the following fields:
|
23 |
- input_ids: list of token ids
|
24 |
- token_type_ids: list of token type ids
|
25 |
- attention_mask: list of indices (0,1) specifying which tokens should considered by the model (return_attention_mask = True).
|
26 |
-
|
27 |
return tokenizer.encode_plus(
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
)
|
35 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
36 |
with col1:
|
|
|
18 |
token_id = []
|
19 |
attention_masks = []
|
20 |
def preprocessing(input_text, tokenizer):
|
21 |
+
'''
|
22 |
Returns <class transformers.tokenization_utils_base.BatchEncoding> with the following fields:
|
23 |
- input_ids: list of token ids
|
24 |
- token_type_ids: list of token type ids
|
25 |
- attention_mask: list of indices (0,1) specifying which tokens should considered by the model (return_attention_mask = True).
|
26 |
+
'''
|
27 |
return tokenizer.encode_plus(
|
28 |
+
input_text,
|
29 |
+
add_special_tokens = True,
|
30 |
+
max_length = 32,
|
31 |
+
pad_to_max_length = True,
|
32 |
+
return_attention_mask = True,
|
33 |
+
return_tensors = 'pt'
|
34 |
)
|
35 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
36 |
with col1:
|