Spaces:
Paused
Paused
import streamlit as st | |
from functions_preprocess import LinguisticPreprocessor, download_if_non_existent, CNN, build_vocab | |
import pickle | |
import nltk | |
from datasets import load_dataset | |
import torch | |
nltk.download('stopwords') | |
nltk.download('punkt') | |
download_if_non_existent('corpora/stopwords', 'stopwords') | |
download_if_non_existent('taggers/averaged_perceptron_tagger', 'averaged_perceptron_tagger') | |
download_if_non_existent('corpora/wordnet', 'wordnet') | |
from torchtext.data.utils import get_tokenizer | |
#################################################################### Streamlit interface | |
st.title("Movie Reviews: An NLP Sentiment analysis") | |
#################################################################### Cache the model loading | |
def load_model(): | |
model_pkl_file = "sentiment_model.pkl" | |
with open(model_pkl_file, 'rb') as file: | |
model = pickle.load(file) | |
return model | |
def load_cnn(): | |
model = CNN(16236, 300, 128, [3, 8], 0.5, 2) | |
model.load_state_dict(torch.load('model_cnn.pkl', map_location=torch.device('cpu'))) | |
model.eval() | |
return model | |
def predict_sentiment(text, model, vocab, torch_text = False): | |
tokenizer = get_tokenizer("basic_english") | |
if torch_text == True: | |
processor.transform(text) | |
tokens = tokenizer(text) | |
encoded = [vocab[token] for token in tokens] | |
input_tensor = torch.tensor(encoded).unsqueeze(0).to(device) | |
with torch.no_grad(): # No gradient needed | |
model.eval() # Evaluation mode | |
outputs = model(input_tensor) | |
probs = torch.softmax(outputs, dim=1) | |
pred_class = torch.argmax(probs, dim=1).item() | |
return pred_class # Return the predicted class index | |
else: | |
processor.transform(text) | |
prediction = model.predict([text]) | |
return prediction | |
model_1 = load_model() | |
model_2 = load_cnn() | |
processor = LinguisticPreprocessor() | |
train_data = load_dataset('rotten_tomatoes', split='train') | |
vocab, tokenizer = build_vocab(train_data) | |
############################################################# Text input | |
with st.expander("Model 1: SGD Classifier"): | |
st.markdown("Give it a go by writing a positive or negative text, and analyze it!") | |
# Text input inside the expander | |
user_input = st.text_area("Enter text here...", key='model1_input') | |
if st.button('Analyze', key='model1_button'): | |
# Displaying output | |
result = predict_sentiment(user_input, model_1) | |
if result >= 0.5: | |
st.write('The sentiment is: Positive π', key='model1_poswrite') | |
else: | |
st.write('The sentiment is: Negative π', key='model1_negwrite') | |
with st.expander("Model 2: CNN Sentiment analysis"): | |
st.markdown("Give it a go by writing a positive or negative text, and analyze it!") | |
# Text input inside the expander | |
user_input = st.text_area("Enter text here...", key='model2_input') | |
if st.button('Analyze', key='model2_button'): | |
# Displaying output | |
result = predict_sentiment(user_input, model_2, vocab, torch_text=True) | |
if result >= 0.5: | |
st.write('The sentiment is: Positive π', key='model2_poswrite') | |
else: | |
st.write('The sentiment is: Negative π', key='model2_negwrite') | |
st.caption("Por @efeperro.") | |
stop_words = set(stopwords.words('english')) |