arssite commited on
Commit
aa2b6f4
1 Parent(s): f187a45

Update SentimentAnalyzerUsingDistilbert.py

Browse files
Files changed (1) hide show
  1. SentimentAnalyzerUsingDistilbert.py +99 -99
SentimentAnalyzerUsingDistilbert.py CHANGED
@@ -1,99 +1,99 @@
1
- import torch
2
- import gradio as gr
3
- import pandas as pd
4
- import matplotlib.pyplot as plt
5
- from transformers import pipeline
6
- from docx import Document
7
- from PyPDF2 import PdfReader
8
-
9
- # Initialize the sentiment analysis pipeline
10
- analyzer = pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")
11
-
12
- # Function to analyze sentiment for a single sentence
13
- def sentiment_analyzer(review):
14
- sentiment = analyzer(review)
15
- return sentiment[0]['label']
16
-
17
- # Create a pie chart visualization for the sentiment
18
- def sentiment_pie_chart(sentiment_labels):
19
- sentiment_counts = pd.Series(sentiment_labels).value_counts()
20
-
21
- fig, ax = plt.subplots()
22
- sentiment_counts.plot(kind='pie', autopct='%1.1f%%', colors=['green', 'red'], ax=ax)
23
- ax.set_ylabel('')
24
- ax.set_title('Sentiment Distribution')
25
-
26
- return fig
27
-
28
- # Function to analyze a single input sentence
29
- def analyze_single_sentence(sentence):
30
- sentiment = sentiment_analyzer(sentence)
31
- return f"The sentiment of the sentence is: {sentiment}"
32
-
33
- # Function to read and analyze reviews from Excel, PDF, or DOCX files
34
- def read_reviews_and_analyze_sentiment(file_object):
35
- if file_object.name.endswith('.xlsx'):
36
- # Load the Excel file into a DataFrame
37
- df = pd.read_excel(file_object)
38
- if 'Reviews' not in df.columns:
39
- raise ValueError("Excel file must contain a 'Reviews' column.")
40
- reviews = df['Reviews'].tolist()
41
-
42
- elif file_object.name.endswith('.docx'):
43
- # Read the content of the DOCX file
44
- doc = Document(file_object)
45
- reviews = [para.text for para in doc.paragraphs if para.text.strip()]
46
-
47
- elif file_object.name.endswith('.pdf'):
48
- # Read the content of the PDF file
49
- reader = PdfReader(file_object)
50
- text = ""
51
- for page in reader.pages:
52
- text += page.extract_text()
53
- reviews = text.split('\n') # Assuming reviews are newline-separated
54
-
55
- else:
56
- raise ValueError("Unsupported file format. Please upload .xlsx, .pdf, or .docx files.")
57
-
58
- # Analyze the sentiment of each review
59
- sentiments = [sentiment_analyzer(review) for review in reviews]
60
- df = pd.DataFrame({'Reviews': reviews, 'Sentiment': sentiments})
61
-
62
- # Generate pie chart
63
- chart_object = sentiment_pie_chart(sentiments)
64
-
65
- return df, chart_object
66
-
67
- # Gradio interface combining single sentence analysis and file-based review sentiment analysis
68
- def main_interface(input_option, sentence=None, file=None):
69
- if input_option == "Single Sentence":
70
- if sentence:
71
- result = analyze_single_sentence(sentence)
72
- return None, None, result # Single sentence output
73
- else:
74
- return None, None, "Please enter a sentence."
75
- elif input_option == "File Upload":
76
- if file:
77
- df, chart_object = read_reviews_and_analyze_sentiment(file)
78
- return df, chart_object, None # File output
79
- else:
80
- return None, None, "Please upload a file."
81
-
82
- # Gradio interface
83
- demo = gr.Interface(
84
- fn=main_interface,
85
- inputs=[
86
- gr.Radio(label="Choose Input Type", choices=["Single Sentence", "File Upload"], value="Single Sentence"),
87
- gr.Textbox(label="Enter a sentence for sentiment analysis (if selected)", placeholder="Type your sentence here..."),
88
- gr.File(file_types=["xlsx", "pdf", "docx"], label="Upload your review comment file (if selected)")
89
- ],
90
- outputs=[
91
- gr.Dataframe(label="Sentiment Analysis Results (For File Uploads)"),
92
- gr.Plot(label="Sentiment Distribution Chart (For File Uploads)"),
93
- gr.Textbox(label="Single Sentence Sentiment Result (For Single Sentence Input)")
94
- ],
95
- title="@GenAILearniverse Project 3: Sentiment Analyzer",
96
- description="This application analyzes the sentiment of either a single sentence or reviews in uploaded files (Excel, PDF, DOCX)."
97
- )
98
-
99
- demo.launch()
 
1
+ import torch
2
+ import gradio as gr
3
+ import pandas as pd
4
+ import matplotlib.pyplot as plt
5
+ from transformers import pipeline
6
+ from docx import Document
7
+ from PyPDF2 import PdfReader
8
+
9
+ # Initialize the sentiment analysis pipeline
10
+ analyzer = pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")
11
+
12
+ # Function to analyze sentiment for a single sentence
13
+ def sentiment_analyzer(review):
14
+ sentiment = analyzer(review)
15
+ return sentiment[0]['label']
16
+
17
+ # Create a pie chart visualization for the sentiment
18
+ def sentiment_pie_chart(sentiment_labels):
19
+ sentiment_counts = pd.Series(sentiment_labels).value_counts()
20
+
21
+ fig, ax = plt.subplots()
22
+ sentiment_counts.plot(kind='pie', autopct='%1.1f%%', colors=['green', 'red'], ax=ax)
23
+ ax.set_ylabel('')
24
+ ax.set_title('Sentiment Distribution')
25
+
26
+ return fig
27
+
28
+ # Function to analyze a single input sentence
29
+ def analyze_single_sentence(sentence):
30
+ sentiment = sentiment_analyzer(sentence)
31
+ return f"The sentiment of the sentence is: {sentiment}"
32
+
33
+ # Function to read and analyze reviews from Excel, PDF, or DOCX files
34
+ def read_reviews_and_analyze_sentiment(file_object):
35
+ if file_object.name.endswith('.xlsx'):
36
+ # Load the Excel file into a DataFrame
37
+ df = pd.read_excel(file_object)
38
+ if 'Reviews' not in df.columns:
39
+ raise ValueError("Excel file must contain a 'Reviews' column.")
40
+ reviews = df['Reviews'].tolist()
41
+
42
+ elif file_object.name.endswith('.docx'):
43
+ # Read the content of the DOCX file
44
+ doc = Document(file_object)
45
+ reviews = [para.text for para in doc.paragraphs if para.text.strip()]
46
+
47
+ elif file_object.name.endswith('.pdf'):
48
+ # Read the content of the PDF file
49
+ reader = PdfReader(file_object)
50
+ text = ""
51
+ for page in reader.pages:
52
+ text += page.extract_text()
53
+ reviews = text.split('\n') # Assuming reviews are newline-separated
54
+
55
+ else:
56
+ raise ValueError("Unsupported file format. Please upload .xlsx, .pdf, or .docx files.")
57
+
58
+ # Analyze the sentiment of each review
59
+ sentiments = [sentiment_analyzer(review) for review in reviews]
60
+ df = pd.DataFrame({'Reviews': reviews, 'Sentiment': sentiments})
61
+
62
+ # Generate pie chart
63
+ chart_object = sentiment_pie_chart(sentiments)
64
+
65
+ return df, chart_object
66
+
67
+ # Gradio interface combining single sentence analysis and file-based review sentiment analysis
68
+ def main_interface(input_option, sentence=None, file=None):
69
+ if input_option == "Single Sentence":
70
+ if sentence:
71
+ result = analyze_single_sentence(sentence)
72
+ return None, None, result # Single sentence output
73
+ else:
74
+ return None, None, "Please enter a sentence."
75
+ elif input_option == "File Upload":
76
+ if file:
77
+ df, chart_object = read_reviews_and_analyze_sentiment(file)
78
+ return df, chart_object, None # File output
79
+ else:
80
+ return None, None, "Please upload a file."
81
+
82
+ # Gradio interface
83
+ demo = gr.Interface(
84
+ fn=main_interface,
85
+ inputs=[
86
+ gr.Radio(label="Choose Input Type", choices=["Single Sentence", "File Upload"], value="Single Sentence"),
87
+ gr.Textbox(label="Enter a sentence for sentiment analysis (if selected)", placeholder="Type your sentence here..."),
88
+ gr.File(file_types=["xlsx", "pdf", "docx"], label="Upload your review comment file (if selected)")
89
+ ],
90
+ outputs=[
91
+ gr.Dataframe(label="Sentiment Analysis Results (For File Uploads)"),
92
+ gr.Plot(label="Sentiment Distribution Chart (For File Uploads)"),
93
+ gr.Textbox(label="Single Sentence Sentiment Result (For Single Sentence Input)")
94
+ ],
95
+ title="Sentiment Analyzer",
96
+ description="This application analyzes the sentiment of either a single sentence or reviews in uploaded files (Excel, PDF, DOCX)."
97
+ )
98
+
99
+ demo.launch()