hardik-kanzariya commited on
Commit
537db34
1 Parent(s): 0747e23

Upload 4 files

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. app.py +68 -0
  3. my_model.h5 +3 -0
  4. requirements.txt +0 -0
  5. tokenizer.json +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
3
+ from tensorflow.keras.preprocessing.text import tokenizer_from_json
4
+ import pandas as pd
5
+ import re
6
+ from nltk.stem import WordNetLemmatizer
7
+ import json
8
+ import numpy as np
9
+ import streamlit as st
10
+
11
+
12
+ # Load your TensorFlow model
13
+ model = tf.keras.models.load_model("model/my_model.h5")
14
+ lemmatizer=WordNetLemmatizer()
15
+ maxlen = 41
16
+ with open('data/tokenizer.json', 'r', encoding='utf-8') as f:
17
+ tokenizer = tokenizer_from_json(json.load(f))
18
+
19
+ def preprocessing(text):
20
+ # Ensure the input is a string, otherwise return an empty string
21
+ if not isinstance(text, str):
22
+ return ''
23
+
24
+ cleaned_text = re.sub(r'(http|https|www)\S+', '', text) # Remove URLs
25
+ cleaned_text = re.sub(r'[@#]\w+', '', cleaned_text) # Remove mentions (like @username) and hashtgs
26
+
27
+ cleaned_text = re.sub(r'[^a-zA-Z\s]', '', text)
28
+ cleaned_text = cleaned_text.replace('\n', ' ')
29
+ cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
30
+
31
+ cleaned_text = cleaned_text.split()
32
+ filtered_words = [lemmatizer.lemmatize(word, pos='v') for word in cleaned_text]
33
+ text = ' '.join(filtered_words)
34
+ return text
35
+
36
+ def getPrediction(input):
37
+ input = pd.DataFrame(input, columns=['text'])
38
+ input['text'] = input['text'].apply(preprocessing)
39
+ print(input['text'][0], end=", ")
40
+ input = tokenizer.texts_to_sequences(input['text'])
41
+ input = pad_sequences(input, maxlen = maxlen, padding = 'post', truncating = 'post')
42
+ prediction = model.predict(input, verbose=0)
43
+ # calculate confidence score
44
+ confidence_score = np.max(prediction, axis=1)/np.sum(prediction, axis=1)
45
+ result = np.argmax(prediction, axis=1)
46
+ for i in range(len(confidence_score)):
47
+ if confidence_score[i] < 0.7:
48
+ result[i] = 2
49
+ print(prediction, confidence_score)
50
+ return result, confidence_score
51
+
52
+ def getSentiment(idx):
53
+ match idx:
54
+ case 0:
55
+ return "Negative"
56
+ case 1:
57
+ return "Positive"
58
+ case default:
59
+ return "Neutral"
60
+
61
+ text = st.text_area("Enter Text...")
62
+
63
+ if text:
64
+ prediction, confidence_score = getPrediction([text]) # Modify if preprocessing is needed
65
+ # Convert prediction to a human-readable format
66
+ response = {"prediction": getSentiment(prediction[0]) + " Statement",
67
+ "confidence": "{:.2f}".format(float(confidence_score[0] * 100)) + "%"} # Adjust as necessary for output formatting
68
+ st.json(response)
my_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92a22638d088facf637db442e78c890b362c06a05f742799b570bf7ce9e109d3
3
+ size 314911896
requirements.txt ADDED
Binary file (218 Bytes). View file
 
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bce784012044cfa9504c1d5f0f4155e6e2a387335e72f992aa5331b498014c82
3
+ size 100485313