nguyennghia0902 commited on
Commit
b64accf
1 Parent(s): e3efb90

Upload all files without model

Browse files
Homepage.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from st_pages import Page, show_pages
3
+
4
+ st.set_page_config(page_title="Sentiment Analysis", page_icon="🏠")
5
+
6
+ show_pages(
7
+ [
8
+ Page("streamlit_app.py/Homepage.py", "Home", "🏠"),
9
+ Page(
10
+ "streamlit_app.py/pages/Sentiment_Analysis.py", "Sentiment Analysis", "📝"
11
+ ),
12
+ ]
13
+ )
14
+
15
+ st.title("Seminar Công nghệ Tri thức - Transformer trong NLP")
16
+ st.markdown(
17
+ """
18
+ **Team members:**
19
+ | Student ID | Full Name |
20
+ | ---------- | ------------------------ |
21
+ | 19120600 | Bùi Nguyên Nghĩa |
22
+ | 19120607 | Phạm Thị Nguyệt |
23
+ """
24
+ )
25
+
26
+ st.header("The Need for Sentiment Analysis")
27
+ st.markdown(
28
+ """
29
+ Sentiment analysis algorithms are used to detect sentiment in a comment or a review.
30
+ It is said that around 90% of consumers read online reviews before visiting a business or buying a product.
31
+ These reviews can be positive or negative or neutral, and it is important to know what the customers are saying about your business.
32
+ """
33
+ )
34
+
35
+ st.header("Technology used")
36
+ st.markdown(
37
+ """
38
+ In this demo, we used BERT as the model for sentiment analysis. BERT is a transformer-based model that was proposed in 2018 by Google.
39
+ It is a pre-trained model that can be used for various NLP tasks such as sentiment analysis, question answering, etc.
40
+ """
41
+ )
42
+
43
+
README.md CHANGED
@@ -1,13 +1,12 @@
1
  ---
2
- title: SentimentAnalysis UsingBERT
3
- emoji: 💻
4
- colorFrom: yellow
5
- colorTo: indigo
6
  sdk: streamlit
7
- sdk_version: 1.25.0
8
- app_file: app.py
9
  pinned: false
10
- license: openrail
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Sentiment Detection Using Bert
3
+ emoji: 🐨
4
+ colorFrom: gray
5
+ colorTo: gray
6
  sdk: streamlit
7
+ sdk_version: 1.21.0
8
+ app_file: streamlit_app.py/Homepage.py
9
  pinned: false
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
Sentiment_Analysis.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from os import path
2
+ import streamlit as st
3
+
4
+ # import pickle
5
+
6
+ # from tensorflow import keras
7
+ import tensorflow as tf
8
+ import torch
9
+ from torch import nn
10
+ from transformers import BertModel, BertTokenizer
11
+
12
+
13
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
+ MODEL_NAME = "bert-base-cased"
15
+ MODEL_PATH = path.join(path.dirname(__file__), "bert_model.h5")
16
+
17
+
18
+ # Build the Sentiment Classifier class
19
+ class SentimentClassifier(nn.Module):
20
+ # Constructor class
21
+ def __init__(self, n_classes):
22
+ super(SentimentClassifier, self).__init__()
23
+ self.bert = BertModel.from_pretrained(MODEL_NAME)
24
+ self.drop = nn.Dropout(p=0.3)
25
+ self.out = nn.Linear(self.bert.config.hidden_size, n_classes)
26
+
27
+ # Forward propagaion class
28
+ def forward(self, input_ids, attention_mask):
29
+ _, pooled_output = self.bert(
30
+ input_ids=input_ids, attention_mask=attention_mask, return_dict=False
31
+ )
32
+ # Add a dropout layer
33
+ output = self.drop(pooled_output)
34
+ return self.out(output)
35
+
36
+
37
+ @st.cache_resource
38
+ def load_model_and_tokenizer():
39
+ model = SentimentClassifier(3)
40
+ model.load_state_dict(torch.load(MODEL_PATH, map_location=torch.device("cpu")))
41
+ model.eval()
42
+ return model, BertTokenizer.from_pretrained("bert-base-cased")
43
+
44
+
45
+ def predict(content):
46
+ model, tokenizer = load_model_and_tokenizer()
47
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
48
+
49
+ encoded_review = tokenizer.encode_plus(
50
+ content,
51
+ max_length=160,
52
+ add_special_tokens=True,
53
+ return_token_type_ids=False,
54
+ pad_to_max_length=True,
55
+ return_attention_mask=True,
56
+ return_tensors="pt",
57
+ )
58
+
59
+ input_ids = encoded_review["input_ids"].to(device)
60
+ attention_mask = encoded_review["attention_mask"].to(device)
61
+
62
+ output = model(input_ids, attention_mask)
63
+ _, prediction = torch.max(output, dim=1)
64
+
65
+ class_names = ["negative", "neutral", "positive"]
66
+
67
+ return class_names[prediction]
68
+
69
+
70
+ def main():
71
+ st.set_page_config(page_title="Sentiment Analysis", page_icon="📝")
72
+
73
+ # giving a title to our page
74
+ st.title("Sentiment analysis")
75
+ contents = st.text_area(
76
+ "Please enter reviews/sentiment/setences/contents:",
77
+ placeholder="Enter your text here",
78
+ height=200,
79
+ )
80
+
81
+ prediction = ""
82
+
83
+ # Create a prediction button
84
+ if st.button("Analyze Sentiment"):
85
+ stripped = contents.strip()
86
+ if not stripped:
87
+ st.error("Please enter some text.")
88
+ return
89
+
90
+ prediction = predict(contents)
91
+ if prediction == "positive":
92
+ st.success("This is positive 😄")
93
+ elif prediction == "negative":
94
+ st.error("This is negative 😟")
95
+ else:
96
+ st.warning("This is neutral 🙂")
97
+
98
+ upload_file = st.file_uploader("Or upload a file", type=["txt"])
99
+ if upload_file is not None:
100
+ contents = upload_file.read().decode("utf-8")
101
+
102
+ for line in contents.splitlines():
103
+ line = line.strip()
104
+ if not line:
105
+ continue
106
+
107
+ prediction = predict(line)
108
+ if prediction == "positive":
109
+ st.success(line + "\n\nThis is positive 😄")
110
+ elif prediction == "negative":
111
+ st.error(line + "\n\nThis is negative 😟")
112
+ else:
113
+ st.warning(line + "\n\nThis is neutral 🙂")
114
+
115
+
116
+ if __name__ == "__main__":
117
+ main()
bert-sentiment-analysis.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
dataset/reviews.csv ADDED
The diff for this file is too large to render. See raw diff
 
gitattributes.txt ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ transformers
2
+ numpy
3
+ pandas
4
+ seaborn
5
+ matplotlib
6
+ scikit-learn
7
+ torch
8
+ tensorflow
9
+ streamlit
10
+ st-pages
streamlit_app.py/Homepage.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from st_pages import Page, show_pages
3
+
4
+ st.set_page_config(page_title="Sentiment Analysis", page_icon="🏠")
5
+
6
+ show_pages(
7
+ [
8
+ Page("streamlit_app.py/Homepage.py", "Home", "🏠"),
9
+ Page(
10
+ "streamlit_app.py/pages/Sentiment_Analysis.py", "Sentiment Analysis", "📝"
11
+ ),
12
+ ]
13
+ )
14
+
15
+ st.title("Seminar Công nghệ Tri thức - Transformer trong NLP")
16
+ st.markdown(
17
+ """
18
+ **Team members:**
19
+ | Student ID | Full Name |
20
+ | ---------- | ------------------------ |
21
+ | 19120600 | Bùi Nguyên Nghĩa |
22
+ | 19120607 | Phạm Thị Nguyệt |
23
+ """
24
+ )
25
+
26
+ st.header("The Need for Sentiment Analysis")
27
+ st.markdown(
28
+ """
29
+ Sentiment analysis algorithms are used to detect sentiment in a comment or a review.
30
+ It is said that around 90% of consumers read online reviews before visiting a business or buying a product.
31
+ These reviews can be positive or negative or neutral, and it is important to know what the customers are saying about your business.
32
+ """
33
+ )
34
+
35
+ st.header("Technology used")
36
+ st.markdown(
37
+ """
38
+ In this demo, we used BERT as the model for sentiment analysis. BERT is a transformer-based model that was proposed in 2018 by Google.
39
+ It is a pre-trained model that can be used for various NLP tasks such as sentiment analysis, question answering, etc.
40
+ """
41
+ )
42
+
43
+
streamlit_app.py/pages/Homepage.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from st_pages import Page, show_pages
3
+
4
+ st.set_page_config(page_title="Sentiment Analysis", page_icon="🏠")
5
+
6
+ show_pages(
7
+ [
8
+ Page("streamlit_app.py/Homepage.py", "Home", "🏠"),
9
+ Page(
10
+ "streamlit_app.py/pages/Sentiment_Analysis.py", "Sentiment Analysis", "📝"
11
+ ),
12
+ ]
13
+ )
14
+
15
+ st.title("Seminar Công nghệ Tri thức - Transformer trong NLP")
16
+ st.markdown(
17
+ """
18
+ **Team members:**
19
+ | Student ID | Full Name |
20
+ | ---------- | ------------------------ |
21
+ | 19120600 | Bùi Nguyên Nghĩa |
22
+ | 19120607 | Phạm Thị Nguyệt |
23
+ """
24
+ )
25
+
26
+ st.header("The Need for Sentiment Analysis")
27
+ st.markdown(
28
+ """
29
+ Sentiment analysis algorithms are used to analyze sentiment in a comment or a review.
30
+ It is said that around 90% of consumers read online reviews before visiting a business or buying a product.
31
+ These reviews can be positive or negative or neutral, and it is important to know what the customers are saying about your business.
32
+ """
33
+ )
34
+
35
+ st.header("Technology used")
36
+ st.markdown(
37
+ """
38
+ In this demo, we used BERT as the model for sentiment analysis. BERT is a transformer-based model that was proposed in 2018 by Google.
39
+ It is a pre-trained model that can be used for various NLP tasks such as sentiment analysis, question answering, etc.
40
+ """
41
+ )
42
+
43
+
streamlit_app.py/pages/Sentiment_Analysis.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from os import path
2
+ import streamlit as st
3
+
4
+ # import pickle
5
+
6
+ # from tensorflow import keras
7
+ import tensorflow as tf
8
+ import torch
9
+ from torch import nn
10
+ from transformers import BertModel, BertTokenizer
11
+
12
+
13
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
+ MODEL_NAME = "bert-base-cased"
15
+ MODEL_PATH = path.join(path.dirname(__file__), "bert_model.h5")
16
+
17
+
18
+ # Build the Sentiment Classifier class
19
+ class SentimentClassifier(nn.Module):
20
+ # Constructor class
21
+ def __init__(self, n_classes):
22
+ super(SentimentClassifier, self).__init__()
23
+ self.bert = BertModel.from_pretrained(MODEL_NAME)
24
+ self.drop = nn.Dropout(p=0.3)
25
+ self.out = nn.Linear(self.bert.config.hidden_size, n_classes)
26
+
27
+ # Forward propagaion class
28
+ def forward(self, input_ids, attention_mask):
29
+ _, pooled_output = self.bert(
30
+ input_ids=input_ids, attention_mask=attention_mask, return_dict=False
31
+ )
32
+ # Add a dropout layer
33
+ output = self.drop(pooled_output)
34
+ return self.out(output)
35
+
36
+
37
+ @st.cache_resource
38
+ def load_model_and_tokenizer():
39
+ model = SentimentClassifier(3)
40
+ model.load_state_dict(torch.load(MODEL_PATH, map_location=torch.device("cpu")))
41
+ model.eval()
42
+ return model, BertTokenizer.from_pretrained("bert-base-cased")
43
+
44
+
45
+ def predict(content):
46
+ model, tokenizer = load_model_and_tokenizer()
47
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
48
+
49
+ encoded_review = tokenizer.encode_plus(
50
+ content,
51
+ max_length=160,
52
+ add_special_tokens=True,
53
+ return_token_type_ids=False,
54
+ pad_to_max_length=True,
55
+ return_attention_mask=True,
56
+ return_tensors="pt",
57
+ )
58
+
59
+ input_ids = encoded_review["input_ids"].to(device)
60
+ attention_mask = encoded_review["attention_mask"].to(device)
61
+
62
+ output = model(input_ids, attention_mask)
63
+ _, prediction = torch.max(output, dim=1)
64
+
65
+ class_names = ["negative", "neutral", "positive"]
66
+
67
+ return class_names[prediction]
68
+
69
+
70
+ def main():
71
+ st.set_page_config(page_title="Sentiment Analysis", page_icon="📝")
72
+
73
+ # giving a title to our page
74
+ st.title("Sentiment analysis")
75
+ contents = st.text_area(
76
+ "Please enter reviews/sentiment/setences/contents:",
77
+ placeholder="Enter your text here",
78
+ height=200,
79
+ )
80
+
81
+ prediction = ""
82
+
83
+ # Create a prediction button
84
+ if st.button("Analyze Sentiment"):
85
+ stripped = contents.strip()
86
+ if not stripped:
87
+ st.error("Please enter some text.")
88
+ return
89
+
90
+ prediction = predict(contents)
91
+ if prediction == "positive":
92
+ st.success("This is positive 😄")
93
+ elif prediction == "negative":
94
+ st.error("This is negative 😟")
95
+ else:
96
+ st.warning("This is neutral 🙂")
97
+
98
+ upload_file = st.file_uploader("Or upload a file", type=["txt"])
99
+ if upload_file is not None:
100
+ contents = upload_file.read().decode("utf-8")
101
+
102
+ for line in contents.splitlines():
103
+ line = line.strip()
104
+ if not line:
105
+ continue
106
+
107
+ prediction = predict(line)
108
+ if prediction == "positive":
109
+ st.success(line + "\n\nThis is positive 😄")
110
+ elif prediction == "negative":
111
+ st.error(line + "\n\nThis is negative 😟")
112
+ else:
113
+ st.warning(line + "\n\nThis is neutral 🙂")
114
+
115
+
116
+ if __name__ == "__main__":
117
+ main()