Sentiment_Analyzer_Using_Distilbert / SentimentAnalyzerUsingDistilbert.py
arssite's picture
Update SentimentAnalyzerUsingDistilbert.py
aa2b6f4 verified
raw
history blame
3.92 kB
import torch
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
from transformers import pipeline
from docx import Document
from PyPDF2 import PdfReader
# Initialize the sentiment analysis pipeline
analyzer = pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")
# Function to analyze sentiment for a single sentence
def sentiment_analyzer(review):
sentiment = analyzer(review)
return sentiment[0]['label']
# Create a pie chart visualization for the sentiment
def sentiment_pie_chart(sentiment_labels):
sentiment_counts = pd.Series(sentiment_labels).value_counts()
fig, ax = plt.subplots()
sentiment_counts.plot(kind='pie', autopct='%1.1f%%', colors=['green', 'red'], ax=ax)
ax.set_ylabel('')
ax.set_title('Sentiment Distribution')
return fig
# Function to analyze a single input sentence
def analyze_single_sentence(sentence):
sentiment = sentiment_analyzer(sentence)
return f"The sentiment of the sentence is: {sentiment}"
# Function to read and analyze reviews from Excel, PDF, or DOCX files
def read_reviews_and_analyze_sentiment(file_object):
if file_object.name.endswith('.xlsx'):
# Load the Excel file into a DataFrame
df = pd.read_excel(file_object)
if 'Reviews' not in df.columns:
raise ValueError("Excel file must contain a 'Reviews' column.")
reviews = df['Reviews'].tolist()
elif file_object.name.endswith('.docx'):
# Read the content of the DOCX file
doc = Document(file_object)
reviews = [para.text for para in doc.paragraphs if para.text.strip()]
elif file_object.name.endswith('.pdf'):
# Read the content of the PDF file
reader = PdfReader(file_object)
text = ""
for page in reader.pages:
text += page.extract_text()
reviews = text.split('\n') # Assuming reviews are newline-separated
else:
raise ValueError("Unsupported file format. Please upload .xlsx, .pdf, or .docx files.")
# Analyze the sentiment of each review
sentiments = [sentiment_analyzer(review) for review in reviews]
df = pd.DataFrame({'Reviews': reviews, 'Sentiment': sentiments})
# Generate pie chart
chart_object = sentiment_pie_chart(sentiments)
return df, chart_object
# Gradio interface combining single sentence analysis and file-based review sentiment analysis
def main_interface(input_option, sentence=None, file=None):
if input_option == "Single Sentence":
if sentence:
result = analyze_single_sentence(sentence)
return None, None, result # Single sentence output
else:
return None, None, "Please enter a sentence."
elif input_option == "File Upload":
if file:
df, chart_object = read_reviews_and_analyze_sentiment(file)
return df, chart_object, None # File output
else:
return None, None, "Please upload a file."
# Gradio interface
demo = gr.Interface(
fn=main_interface,
inputs=[
gr.Radio(label="Choose Input Type", choices=["Single Sentence", "File Upload"], value="Single Sentence"),
gr.Textbox(label="Enter a sentence for sentiment analysis (if selected)", placeholder="Type your sentence here..."),
gr.File(file_types=["xlsx", "pdf", "docx"], label="Upload your review comment file (if selected)")
],
outputs=[
gr.Dataframe(label="Sentiment Analysis Results (For File Uploads)"),
gr.Plot(label="Sentiment Distribution Chart (For File Uploads)"),
gr.Textbox(label="Single Sentence Sentiment Result (For Single Sentence Input)")
],
title="Sentiment Analyzer",
description="This application analyzes the sentiment of either a single sentence or reviews in uploaded files (Excel, PDF, DOCX)."
)
demo.launch()