Update SentimentAnalyzerUsingDistilbert.py
Browse files
SentimentAnalyzerUsingDistilbert.py
CHANGED
@@ -1,99 +1,99 @@
|
|
1 |
-
import torch
|
2 |
-
import gradio as gr
|
3 |
-
import pandas as pd
|
4 |
-
import matplotlib.pyplot as plt
|
5 |
-
from transformers import pipeline
|
6 |
-
from docx import Document
|
7 |
-
from PyPDF2 import PdfReader
|
8 |
-
|
9 |
-
# Initialize the sentiment analysis pipeline
|
10 |
-
analyzer = pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")
|
11 |
-
|
12 |
-
# Function to analyze sentiment for a single sentence
|
13 |
-
def sentiment_analyzer(review):
|
14 |
-
sentiment = analyzer(review)
|
15 |
-
return sentiment[0]['label']
|
16 |
-
|
17 |
-
# Create a pie chart visualization for the sentiment
|
18 |
-
def sentiment_pie_chart(sentiment_labels):
|
19 |
-
sentiment_counts = pd.Series(sentiment_labels).value_counts()
|
20 |
-
|
21 |
-
fig, ax = plt.subplots()
|
22 |
-
sentiment_counts.plot(kind='pie', autopct='%1.1f%%', colors=['green', 'red'], ax=ax)
|
23 |
-
ax.set_ylabel('')
|
24 |
-
ax.set_title('Sentiment Distribution')
|
25 |
-
|
26 |
-
return fig
|
27 |
-
|
28 |
-
# Function to analyze a single input sentence
|
29 |
-
def analyze_single_sentence(sentence):
|
30 |
-
sentiment = sentiment_analyzer(sentence)
|
31 |
-
return f"The sentiment of the sentence is: {sentiment}"
|
32 |
-
|
33 |
-
# Function to read and analyze reviews from Excel, PDF, or DOCX files
|
34 |
-
def read_reviews_and_analyze_sentiment(file_object):
|
35 |
-
if file_object.name.endswith('.xlsx'):
|
36 |
-
# Load the Excel file into a DataFrame
|
37 |
-
df = pd.read_excel(file_object)
|
38 |
-
if 'Reviews' not in df.columns:
|
39 |
-
raise ValueError("Excel file must contain a 'Reviews' column.")
|
40 |
-
reviews = df['Reviews'].tolist()
|
41 |
-
|
42 |
-
elif file_object.name.endswith('.docx'):
|
43 |
-
# Read the content of the DOCX file
|
44 |
-
doc = Document(file_object)
|
45 |
-
reviews = [para.text for para in doc.paragraphs if para.text.strip()]
|
46 |
-
|
47 |
-
elif file_object.name.endswith('.pdf'):
|
48 |
-
# Read the content of the PDF file
|
49 |
-
reader = PdfReader(file_object)
|
50 |
-
text = ""
|
51 |
-
for page in reader.pages:
|
52 |
-
text += page.extract_text()
|
53 |
-
reviews = text.split('\n') # Assuming reviews are newline-separated
|
54 |
-
|
55 |
-
else:
|
56 |
-
raise ValueError("Unsupported file format. Please upload .xlsx, .pdf, or .docx files.")
|
57 |
-
|
58 |
-
# Analyze the sentiment of each review
|
59 |
-
sentiments = [sentiment_analyzer(review) for review in reviews]
|
60 |
-
df = pd.DataFrame({'Reviews': reviews, 'Sentiment': sentiments})
|
61 |
-
|
62 |
-
# Generate pie chart
|
63 |
-
chart_object = sentiment_pie_chart(sentiments)
|
64 |
-
|
65 |
-
return df, chart_object
|
66 |
-
|
67 |
-
# Gradio interface combining single sentence analysis and file-based review sentiment analysis
|
68 |
-
def main_interface(input_option, sentence=None, file=None):
|
69 |
-
if input_option == "Single Sentence":
|
70 |
-
if sentence:
|
71 |
-
result = analyze_single_sentence(sentence)
|
72 |
-
return None, None, result # Single sentence output
|
73 |
-
else:
|
74 |
-
return None, None, "Please enter a sentence."
|
75 |
-
elif input_option == "File Upload":
|
76 |
-
if file:
|
77 |
-
df, chart_object = read_reviews_and_analyze_sentiment(file)
|
78 |
-
return df, chart_object, None # File output
|
79 |
-
else:
|
80 |
-
return None, None, "Please upload a file."
|
81 |
-
|
82 |
-
# Gradio interface
|
83 |
-
demo = gr.Interface(
|
84 |
-
fn=main_interface,
|
85 |
-
inputs=[
|
86 |
-
gr.Radio(label="Choose Input Type", choices=["Single Sentence", "File Upload"], value="Single Sentence"),
|
87 |
-
gr.Textbox(label="Enter a sentence for sentiment analysis (if selected)", placeholder="Type your sentence here..."),
|
88 |
-
gr.File(file_types=["xlsx", "pdf", "docx"], label="Upload your review comment file (if selected)")
|
89 |
-
],
|
90 |
-
outputs=[
|
91 |
-
gr.Dataframe(label="Sentiment Analysis Results (For File Uploads)"),
|
92 |
-
gr.Plot(label="Sentiment Distribution Chart (For File Uploads)"),
|
93 |
-
gr.Textbox(label="Single Sentence Sentiment Result (For Single Sentence Input)")
|
94 |
-
],
|
95 |
-
title="
|
96 |
-
description="This application analyzes the sentiment of either a single sentence or reviews in uploaded files (Excel, PDF, DOCX)."
|
97 |
-
)
|
98 |
-
|
99 |
-
demo.launch()
|
|
|
1 |
+
import torch
|
2 |
+
import gradio as gr
|
3 |
+
import pandas as pd
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
from transformers import pipeline
|
6 |
+
from docx import Document
|
7 |
+
from PyPDF2 import PdfReader
|
8 |
+
|
9 |
+
# Initialize the sentiment analysis pipeline
|
10 |
+
analyzer = pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")
|
11 |
+
|
12 |
+
# Function to analyze sentiment for a single sentence
|
13 |
+
def sentiment_analyzer(review):
|
14 |
+
sentiment = analyzer(review)
|
15 |
+
return sentiment[0]['label']
|
16 |
+
|
17 |
+
# Create a pie chart visualization for the sentiment
|
18 |
+
def sentiment_pie_chart(sentiment_labels):
|
19 |
+
sentiment_counts = pd.Series(sentiment_labels).value_counts()
|
20 |
+
|
21 |
+
fig, ax = plt.subplots()
|
22 |
+
sentiment_counts.plot(kind='pie', autopct='%1.1f%%', colors=['green', 'red'], ax=ax)
|
23 |
+
ax.set_ylabel('')
|
24 |
+
ax.set_title('Sentiment Distribution')
|
25 |
+
|
26 |
+
return fig
|
27 |
+
|
28 |
+
# Function to analyze a single input sentence
|
29 |
+
def analyze_single_sentence(sentence):
|
30 |
+
sentiment = sentiment_analyzer(sentence)
|
31 |
+
return f"The sentiment of the sentence is: {sentiment}"
|
32 |
+
|
33 |
+
# Function to read and analyze reviews from Excel, PDF, or DOCX files
|
34 |
+
def read_reviews_and_analyze_sentiment(file_object):
|
35 |
+
if file_object.name.endswith('.xlsx'):
|
36 |
+
# Load the Excel file into a DataFrame
|
37 |
+
df = pd.read_excel(file_object)
|
38 |
+
if 'Reviews' not in df.columns:
|
39 |
+
raise ValueError("Excel file must contain a 'Reviews' column.")
|
40 |
+
reviews = df['Reviews'].tolist()
|
41 |
+
|
42 |
+
elif file_object.name.endswith('.docx'):
|
43 |
+
# Read the content of the DOCX file
|
44 |
+
doc = Document(file_object)
|
45 |
+
reviews = [para.text for para in doc.paragraphs if para.text.strip()]
|
46 |
+
|
47 |
+
elif file_object.name.endswith('.pdf'):
|
48 |
+
# Read the content of the PDF file
|
49 |
+
reader = PdfReader(file_object)
|
50 |
+
text = ""
|
51 |
+
for page in reader.pages:
|
52 |
+
text += page.extract_text()
|
53 |
+
reviews = text.split('\n') # Assuming reviews are newline-separated
|
54 |
+
|
55 |
+
else:
|
56 |
+
raise ValueError("Unsupported file format. Please upload .xlsx, .pdf, or .docx files.")
|
57 |
+
|
58 |
+
# Analyze the sentiment of each review
|
59 |
+
sentiments = [sentiment_analyzer(review) for review in reviews]
|
60 |
+
df = pd.DataFrame({'Reviews': reviews, 'Sentiment': sentiments})
|
61 |
+
|
62 |
+
# Generate pie chart
|
63 |
+
chart_object = sentiment_pie_chart(sentiments)
|
64 |
+
|
65 |
+
return df, chart_object
|
66 |
+
|
67 |
+
# Gradio interface combining single sentence analysis and file-based review sentiment analysis
|
68 |
+
def main_interface(input_option, sentence=None, file=None):
|
69 |
+
if input_option == "Single Sentence":
|
70 |
+
if sentence:
|
71 |
+
result = analyze_single_sentence(sentence)
|
72 |
+
return None, None, result # Single sentence output
|
73 |
+
else:
|
74 |
+
return None, None, "Please enter a sentence."
|
75 |
+
elif input_option == "File Upload":
|
76 |
+
if file:
|
77 |
+
df, chart_object = read_reviews_and_analyze_sentiment(file)
|
78 |
+
return df, chart_object, None # File output
|
79 |
+
else:
|
80 |
+
return None, None, "Please upload a file."
|
81 |
+
|
82 |
+
# Gradio interface
|
83 |
+
demo = gr.Interface(
|
84 |
+
fn=main_interface,
|
85 |
+
inputs=[
|
86 |
+
gr.Radio(label="Choose Input Type", choices=["Single Sentence", "File Upload"], value="Single Sentence"),
|
87 |
+
gr.Textbox(label="Enter a sentence for sentiment analysis (if selected)", placeholder="Type your sentence here..."),
|
88 |
+
gr.File(file_types=["xlsx", "pdf", "docx"], label="Upload your review comment file (if selected)")
|
89 |
+
],
|
90 |
+
outputs=[
|
91 |
+
gr.Dataframe(label="Sentiment Analysis Results (For File Uploads)"),
|
92 |
+
gr.Plot(label="Sentiment Distribution Chart (For File Uploads)"),
|
93 |
+
gr.Textbox(label="Single Sentence Sentiment Result (For Single Sentence Input)")
|
94 |
+
],
|
95 |
+
title="Sentiment Analyzer",
|
96 |
+
description="This application analyzes the sentiment of either a single sentence or reviews in uploaded files (Excel, PDF, DOCX)."
|
97 |
+
)
|
98 |
+
|
99 |
+
demo.launch()
|