Spaces:
Sleeping
Sleeping
hardik-kanzariya
commited on
Commit
•
537db34
1
Parent(s):
0747e23
Upload 4 files
Browse files- .gitattributes +1 -0
- app.py +68 -0
- my_model.h5 +3 -0
- requirements.txt +0 -0
- tokenizer.json +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tensorflow as tf
|
2 |
+
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
3 |
+
from tensorflow.keras.preprocessing.text import tokenizer_from_json
|
4 |
+
import pandas as pd
|
5 |
+
import re
|
6 |
+
from nltk.stem import WordNetLemmatizer
|
7 |
+
import json
|
8 |
+
import numpy as np
|
9 |
+
import streamlit as st
|
10 |
+
|
11 |
+
|
12 |
+
# Load your TensorFlow model
|
13 |
+
model = tf.keras.models.load_model("model/my_model.h5")
|
14 |
+
lemmatizer=WordNetLemmatizer()
|
15 |
+
maxlen = 41
|
16 |
+
with open('data/tokenizer.json', 'r', encoding='utf-8') as f:
|
17 |
+
tokenizer = tokenizer_from_json(json.load(f))
|
18 |
+
|
19 |
+
def preprocessing(text):
|
20 |
+
# Ensure the input is a string, otherwise return an empty string
|
21 |
+
if not isinstance(text, str):
|
22 |
+
return ''
|
23 |
+
|
24 |
+
cleaned_text = re.sub(r'(http|https|www)\S+', '', text) # Remove URLs
|
25 |
+
cleaned_text = re.sub(r'[@#]\w+', '', cleaned_text) # Remove mentions (like @username) and hashtgs
|
26 |
+
|
27 |
+
cleaned_text = re.sub(r'[^a-zA-Z\s]', '', text)
|
28 |
+
cleaned_text = cleaned_text.replace('\n', ' ')
|
29 |
+
cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
|
30 |
+
|
31 |
+
cleaned_text = cleaned_text.split()
|
32 |
+
filtered_words = [lemmatizer.lemmatize(word, pos='v') for word in cleaned_text]
|
33 |
+
text = ' '.join(filtered_words)
|
34 |
+
return text
|
35 |
+
|
36 |
+
def getPrediction(input):
|
37 |
+
input = pd.DataFrame(input, columns=['text'])
|
38 |
+
input['text'] = input['text'].apply(preprocessing)
|
39 |
+
print(input['text'][0], end=", ")
|
40 |
+
input = tokenizer.texts_to_sequences(input['text'])
|
41 |
+
input = pad_sequences(input, maxlen = maxlen, padding = 'post', truncating = 'post')
|
42 |
+
prediction = model.predict(input, verbose=0)
|
43 |
+
# calculate confidence score
|
44 |
+
confidence_score = np.max(prediction, axis=1)/np.sum(prediction, axis=1)
|
45 |
+
result = np.argmax(prediction, axis=1)
|
46 |
+
for i in range(len(confidence_score)):
|
47 |
+
if confidence_score[i] < 0.7:
|
48 |
+
result[i] = 2
|
49 |
+
print(prediction, confidence_score)
|
50 |
+
return result, confidence_score
|
51 |
+
|
52 |
+
def getSentiment(idx):
|
53 |
+
match idx:
|
54 |
+
case 0:
|
55 |
+
return "Negative"
|
56 |
+
case 1:
|
57 |
+
return "Positive"
|
58 |
+
case default:
|
59 |
+
return "Neutral"
|
60 |
+
|
61 |
+
text = st.text_area("Enter Text...")
|
62 |
+
|
63 |
+
if text:
|
64 |
+
prediction, confidence_score = getPrediction([text]) # Modify if preprocessing is needed
|
65 |
+
# Convert prediction to a human-readable format
|
66 |
+
response = {"prediction": getSentiment(prediction[0]) + " Statement",
|
67 |
+
"confidence": "{:.2f}".format(float(confidence_score[0] * 100)) + "%"} # Adjust as necessary for output formatting
|
68 |
+
st.json(response)
|
my_model.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:92a22638d088facf637db442e78c890b362c06a05f742799b570bf7ce9e109d3
|
3 |
+
size 314911896
|
requirements.txt
ADDED
Binary file (218 Bytes). View file
|
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bce784012044cfa9504c1d5f0f4155e6e2a387335e72f992aa5331b498014c82
|
3 |
+
size 100485313
|