File size: 999 Bytes
e337348
 
 
 
 
 
 
 
b7e764c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e337348
b7e764c
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import gradio as gr
import pickle
import nltk
from nltk.corpus import stopwords
import string
from sklearn.feature_extraction.text import  TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB

tfidf=pickle.load(open('tfidf.pkl','rb'))
model=pickle.load(open('model.pkl','rb'))

def classify_msg(Message):
    X=preprocess(Message)
    X_vector=tfidf.transform([X])
    prediction=model.predict(X_vector)[0]
    return 'Spam' if prediction==1 else 'Not Spam'
    
    
def preprocess(text):
    text = text.lower()
    tokens = nltk.word_tokenize(text)
    text = []
    for token in tokens:
        if token not in stopwords.words('english') and token not in string.punctuation:
            text.append(token)

    return ' '.join(text)


iface = gr.Interface(
    fn=classify_msg,
    inputs=gr.inputs.Textbox(placeholder='If your message has more than 50 words, the probability of a correct prediction is high.'),
    outputs="text",
)
    
if __name__ == "__main__":
    iface.launch()