Spaces:
Sleeping
Sleeping
import tensorflow as tf | |
from tensorflow.keras.preprocessing.sequence import pad_sequences | |
from tensorflow.keras.preprocessing.text import tokenizer_from_json | |
import pandas as pd | |
import re | |
from nltk.stem import WordNetLemmatizer | |
import nltk | |
import json | |
import numpy as np | |
import streamlit as st | |
from fastapi import FastAPI, HTTPException | |
app = FastAPI() | |
nltk.download('wordnet') | |
# Load your TensorFlow model | |
model = tf.keras.models.load_model("my_model.h5") | |
lemmatizer=WordNetLemmatizer() | |
maxlen = 41 | |
with open('tokenizer.json', 'r', encoding='utf-8') as f: | |
tokenizer = tokenizer_from_json(json.load(f)) | |
def preprocessing(text): | |
# Ensure the input is a string, otherwise return an empty string | |
if not isinstance(text, str): | |
return '' | |
cleaned_text = re.sub(r'(http|https|www)\S+', '', text) # Remove URLs | |
cleaned_text = re.sub(r'[@#]\w+', '', cleaned_text) # Remove mentions (like @username) and hashtgs | |
cleaned_text = re.sub(r'[^a-zA-Z\s]', '', text) | |
cleaned_text = cleaned_text.replace('\n', ' ') | |
cleaned_text = re.sub(r'\s+', ' ', cleaned_text) | |
cleaned_text = cleaned_text.split() | |
filtered_words = [lemmatizer.lemmatize(word, pos='v') for word in cleaned_text] | |
text = ' '.join(filtered_words) | |
return text | |
def getPrediction(input): | |
input = pd.DataFrame(input, columns=['text']) | |
input['text'] = input['text'].apply(preprocessing) | |
print(input['text'][0], end=", ") | |
input = tokenizer.texts_to_sequences(input['text']) | |
input = pad_sequences(input, maxlen = maxlen, padding = 'post', truncating = 'post') | |
prediction = model.predict(input, verbose=0) | |
# calculate confidence score | |
confidence_score = np.max(prediction, axis=1)/np.sum(prediction, axis=1) | |
result = np.argmax(prediction, axis=1) | |
for i in range(len(confidence_score)): | |
if confidence_score[i] < 0.7: | |
result[i] = 2 | |
print(prediction, confidence_score) | |
return result, confidence_score | |
def getSentiment(idx): | |
return {0: "Negative", 1: "Positive", 2: "Neutral"}.get(idx, "Neutral") | |
async def predict(text: str): | |
prediction, confidence_score = getPrediction(text) | |
return { | |
"prediction": getSentiment(prediction) + " Statement", | |
"confidence": f"{confidence_score * 100:.2f}%" | |
} | |
# Streamlit UI | |
st.title("Sentiment Analysis") | |
text = st.text_area("Enter Text...") | |
if text: | |
prediction, confidence_score = getPrediction([text]) # Modify if preprocessing is needed | |
# Convert prediction to a human-readable format | |
response = {"prediction": getSentiment(prediction[0]) + " Statement", | |
"confidence": "{:.2f}".format(float(confidence_score[0] * 100)) + "%"} # Adjust as necessary for output formatting | |
st.json(response) | |