Spaces:

arssite
/

Sentiment_Analyzer_Using_Distilbert

Sleeping

File size: 3,916 Bytes

aa2b6f4

import torch
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
from transformers import pipeline
from docx import Document
from PyPDF2 import PdfReader

# Initialize the sentiment analysis pipeline
analyzer = pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")

# Function to analyze sentiment for a single sentence
def sentiment_analyzer(review):
    sentiment = analyzer(review)
    return sentiment[0]['label']

# Create a pie chart visualization for the sentiment
def sentiment_pie_chart(sentiment_labels):
    sentiment_counts = pd.Series(sentiment_labels).value_counts()
    
    fig, ax = plt.subplots()
    sentiment_counts.plot(kind='pie', autopct='%1.1f%%', colors=['green', 'red'], ax=ax)
    ax.set_ylabel('')
    ax.set_title('Sentiment Distribution')
    
    return fig

# Function to analyze a single input sentence
def analyze_single_sentence(sentence):
    sentiment = sentiment_analyzer(sentence)
    return f"The sentiment of the sentence is: {sentiment}"

# Function to read and analyze reviews from Excel, PDF, or DOCX files
def read_reviews_and_analyze_sentiment(file_object):
    if file_object.name.endswith('.xlsx'):
        # Load the Excel file into a DataFrame
        df = pd.read_excel(file_object)
        if 'Reviews' not in df.columns:
            raise ValueError("Excel file must contain a 'Reviews' column.")
        reviews = df['Reviews'].tolist()
    
    elif file_object.name.endswith('.docx'):
        # Read the content of the DOCX file
        doc = Document(file_object)
        reviews = [para.text for para in doc.paragraphs if para.text.strip()]
    
    elif file_object.name.endswith('.pdf'):
        # Read the content of the PDF file
        reader = PdfReader(file_object)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
        reviews = text.split('\n')  # Assuming reviews are newline-separated
    
    else:
        raise ValueError("Unsupported file format. Please upload .xlsx, .pdf, or .docx files.")
    
    # Analyze the sentiment of each review
    sentiments = [sentiment_analyzer(review) for review in reviews]
    df = pd.DataFrame({'Reviews': reviews, 'Sentiment': sentiments})
    
    # Generate pie chart
    chart_object = sentiment_pie_chart(sentiments)
    
    return df, chart_object

# Gradio interface combining single sentence analysis and file-based review sentiment analysis
def main_interface(input_option, sentence=None, file=None):
    if input_option == "Single Sentence":
        if sentence:
            result = analyze_single_sentence(sentence)
            return None, None, result  # Single sentence output
        else:
            return None, None, "Please enter a sentence."
    elif input_option == "File Upload":
        if file:
            df, chart_object = read_reviews_and_analyze_sentiment(file)
            return df, chart_object, None  # File output
        else:
            return None, None, "Please upload a file."

# Gradio interface
demo = gr.Interface(
    fn=main_interface,
    inputs=[
        gr.Radio(label="Choose Input Type", choices=["Single Sentence", "File Upload"], value="Single Sentence"),
        gr.Textbox(label="Enter a sentence for sentiment analysis (if selected)", placeholder="Type your sentence here..."),
        gr.File(file_types=["xlsx", "pdf", "docx"], label="Upload your review comment file (if selected)")
    ],
    outputs=[
        gr.Dataframe(label="Sentiment Analysis Results (For File Uploads)"),
        gr.Plot(label="Sentiment Distribution Chart (For File Uploads)"),
        gr.Textbox(label="Single Sentence Sentiment Result (For Single Sentence Input)")
    ],
    title="Sentiment Analyzer",
    description="This application analyzes the sentiment of either a single sentence or reviews in uploaded files (Excel, PDF, DOCX)."
)

demo.launch()