Spaces:
Sleeping
Sleeping
File size: 2,793 Bytes
5ad11a9 d72a1fc 5ad11a9 5ccafa6 5ad11a9 50266a7 5ad11a9 2f6f156 5ccafa6 5ad11a9 ea1035b 9ad20d9 34eb5b5 9ad20d9 ea1035b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import tokenizer_from_json
import pandas as pd
import re
from nltk.stem import WordNetLemmatizer
import nltk
import json
import numpy as np
import streamlit as st
from fastapi import FastAPI, HTTPException
app = FastAPI()
nltk.download('wordnet')
# Load your TensorFlow model
model = tf.keras.models.load_model("my_model.h5")
lemmatizer=WordNetLemmatizer()
maxlen = 41
with open('tokenizer.json', 'r', encoding='utf-8') as f:
tokenizer = tokenizer_from_json(json.load(f))
def preprocessing(text):
# Ensure the input is a string, otherwise return an empty string
if not isinstance(text, str):
return ''
cleaned_text = re.sub(r'(http|https|www)\S+', '', text) # Remove URLs
cleaned_text = re.sub(r'[@#]\w+', '', cleaned_text) # Remove mentions (like @username) and hashtgs
cleaned_text = re.sub(r'[^a-zA-Z\s]', '', text)
cleaned_text = cleaned_text.replace('\n', ' ')
cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
cleaned_text = cleaned_text.split()
filtered_words = [lemmatizer.lemmatize(word, pos='v') for word in cleaned_text]
text = ' '.join(filtered_words)
return text
def getPrediction(input):
input = pd.DataFrame(input, columns=['text'])
input['text'] = input['text'].apply(preprocessing)
print(input['text'][0], end=", ")
input = tokenizer.texts_to_sequences(input['text'])
input = pad_sequences(input, maxlen = maxlen, padding = 'post', truncating = 'post')
prediction = model.predict(input, verbose=0)
# calculate confidence score
confidence_score = np.max(prediction, axis=1)/np.sum(prediction, axis=1)
result = np.argmax(prediction, axis=1)
for i in range(len(confidence_score)):
if confidence_score[i] < 0.7:
result[i] = 2
print(prediction, confidence_score)
return result, confidence_score
def getSentiment(idx):
return {0: "Negative", 1: "Positive", 2: "Neutral"}.get(idx, "Neutral")
@app.post("/predict")
async def predict(text: str):
prediction, confidence_score = getPrediction(text)
return {
"prediction": getSentiment(prediction) + " Statement",
"confidence": f"{confidence_score * 100:.2f}%"
}
# Streamlit UI
st.title("Sentiment Analysis")
text = st.text_area("Enter Text...")
if text:
prediction, confidence_score = getPrediction([text]) # Modify if preprocessing is needed
# Convert prediction to a human-readable format
response = {"prediction": getSentiment(prediction[0]) + " Statement",
"confidence": "{:.2f}".format(float(confidence_score[0] * 100)) + "%"} # Adjust as necessary for output formatting
st.json(response)
|