import tensorflow as tf from tensorflow.keras.preprocessing.sequence import pad_sequences from tensorflow.keras.preprocessing.text import tokenizer_from_json import pandas as pd import re from nltk.stem import WordNetLemmatizer import nltk import json import numpy as np import streamlit as st nltk.download('wordnet') # Load your TensorFlow model model = tf.keras.models.load_model("my_model.h5") lemmatizer=WordNetLemmatizer() maxlen = 41 with open('tokenizer.json', 'r', encoding='utf-8') as f: tokenizer = tokenizer_from_json(json.load(f)) def preprocessing(text): # Ensure the input is a string, otherwise return an empty string if not isinstance(text, str): return '' cleaned_text = re.sub(r'(http|https|www)\S+', '', text) # Remove URLs cleaned_text = re.sub(r'[@#]\w+', '', cleaned_text) # Remove mentions (like @username) and hashtgs cleaned_text = re.sub(r'[^a-zA-Z\s]', '', text) cleaned_text = cleaned_text.replace('\n', ' ') cleaned_text = re.sub(r'\s+', ' ', cleaned_text) cleaned_text = cleaned_text.split() filtered_words = [lemmatizer.lemmatize(word, pos='v') for word in cleaned_text] text = ' '.join(filtered_words) return text def getPrediction(input): input = pd.DataFrame(input, columns=['text']) input['text'] = input['text'].apply(preprocessing) print(input['text'][0], end=", ") input = tokenizer.texts_to_sequences(input['text']) input = pad_sequences(input, maxlen = maxlen, padding = 'post', truncating = 'post') prediction = model.predict(input, verbose=0) # calculate confidence score confidence_score = np.max(prediction, axis=1)/np.sum(prediction, axis=1) result = np.argmax(prediction, axis=1) for i in range(len(confidence_score)): if confidence_score[i] < 0.7: result[i] = 2 print(prediction, confidence_score) return result, confidence_score def getSentiment(idx): match idx: case 0: return "Negative" case 1: return "Positive" case default: return "Neutral" text = st.text_area("Enter Text...") if text: prediction, confidence_score = getPrediction([text]) # Modify if preprocessing is needed # Convert prediction to a human-readable format response = {"prediction": getSentiment(prediction[0]) + " Statement", "confidence": "{:.2f}".format(float(confidence_score[0] * 100)) + "%"} # Adjust as necessary for output formatting st.json(response)