Spaces:
Sleeping
Sleeping
refactored code to run in huggingface spaces
Browse files
app.py
ADDED
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from gnews import GNews
|
3 |
+
import pandas as pd
|
4 |
+
from transformers import pipeline
|
5 |
+
from datetime import datetime, timedelta
|
6 |
+
|
7 |
+
|
8 |
+
def discard_old_rows(df):
|
9 |
+
# Convert the 'published date' column to datetime
|
10 |
+
df['published date'] = pd.to_datetime(df['published date'], format='%a, %d %b %Y %H:%M:%S %Z')
|
11 |
+
|
12 |
+
# Get the current date
|
13 |
+
current_date = datetime.utcnow()
|
14 |
+
|
15 |
+
# Calculate the date two months ago
|
16 |
+
two_months_ago = current_date - timedelta(days=60)
|
17 |
+
|
18 |
+
# Filter the DataFrame to keep only the rows with 'published date' within the last two months
|
19 |
+
df_filtered = df[df['published date'] >= two_months_ago]
|
20 |
+
|
21 |
+
return df_filtered
|
22 |
+
|
23 |
+
|
24 |
+
def extract_and_clean_titles(df):
|
25 |
+
# Initialize an empty list to store the cleaned titles
|
26 |
+
values_list = []
|
27 |
+
|
28 |
+
# Iterate over each value in the 'title' column of the DataFrame
|
29 |
+
for value in df['title']:
|
30 |
+
# Find the position of the first hyphen in the title
|
31 |
+
index = value.find('-')
|
32 |
+
|
33 |
+
# Extract the part of the title before the hyphen
|
34 |
+
# If there's no hyphen, use the entire title
|
35 |
+
extracted_value = value[:index] if index >= 0 else value
|
36 |
+
|
37 |
+
# Remove any occurrences of '...' from the extracted value
|
38 |
+
cleaned_value = extracted_value.replace('...', '')
|
39 |
+
|
40 |
+
# Append the cleaned value to the list
|
41 |
+
values_list.append(cleaned_value)
|
42 |
+
|
43 |
+
# Return the list of cleaned titles
|
44 |
+
return values_list
|
45 |
+
|
46 |
+
|
47 |
+
def analyze_sentiments(values_list, sentiment_analysis):
|
48 |
+
# Initialize an empty list to store the sentiment predictions
|
49 |
+
prediction = []
|
50 |
+
|
51 |
+
# Iterate over each news title in the values_list
|
52 |
+
for news in values_list:
|
53 |
+
# Perform sentiment analysis on the current news title
|
54 |
+
sentiment = sentiment_analysis(news)
|
55 |
+
|
56 |
+
# Append the resulting sentiment to the prediction list
|
57 |
+
prediction.append(sentiment)
|
58 |
+
|
59 |
+
# Return the list of sentiment predictions
|
60 |
+
return prediction
|
61 |
+
|
62 |
+
|
63 |
+
def calculate_weighted_average(predictions):
|
64 |
+
# Initialize the weighted average score to zero
|
65 |
+
weighted_avg = 0
|
66 |
+
|
67 |
+
# Iterate over each prediction in the predictions list
|
68 |
+
for i in predictions:
|
69 |
+
# Check if the label of the first sentiment prediction is 'positive'
|
70 |
+
if i[0]['label'] == 'positive':
|
71 |
+
# Add the score to the weighted average (positive sentiment)
|
72 |
+
weighted_avg += 1 * i[0]['score']
|
73 |
+
# Check if the label of the first sentiment prediction is 'negative'
|
74 |
+
elif i[0]['label'] == 'negative':
|
75 |
+
# Subtract the score from the weighted average (negative sentiment)
|
76 |
+
weighted_avg += -1 * i[0]['score']
|
77 |
+
|
78 |
+
# Calculate the weighted average by dividing by the number of predictions
|
79 |
+
weighted_avg /= len(predictions)
|
80 |
+
|
81 |
+
# Return the calculated weighted average
|
82 |
+
return weighted_avg
|
83 |
+
|
84 |
+
|
85 |
+
def sentiment_pie_chart(predictions):
|
86 |
+
"""
|
87 |
+
Generates a pie chart for sentiment distribution.
|
88 |
+
"""
|
89 |
+
positive_count = 0
|
90 |
+
negative_count = 0
|
91 |
+
neutral_count = 0
|
92 |
+
|
93 |
+
for item in predictions:
|
94 |
+
label = item[0]['label']
|
95 |
+
if label == 'positive':
|
96 |
+
positive_count += 1
|
97 |
+
elif label == 'negative':
|
98 |
+
negative_count += 1
|
99 |
+
elif label == 'neutral':
|
100 |
+
neutral_count += 1
|
101 |
+
|
102 |
+
labels = ['Positive', 'Negative', 'Neutral']
|
103 |
+
sizes = [positive_count, negative_count, neutral_count]
|
104 |
+
colors = ['#66BB6A', '#EF5350', '#42A5F5']
|
105 |
+
|
106 |
+
fig, ax = plt.subplots()
|
107 |
+
ax.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90, pctdistance=0.85)
|
108 |
+
center_circle = plt.Circle((0, 0), 0.70, fc='white')
|
109 |
+
fig.gca().add_artist(center_circle)
|
110 |
+
ax.axis('equal')
|
111 |
+
plt.title('Sentiment Analysis Results')
|
112 |
+
return fig
|
113 |
+
|
114 |
+
|
115 |
+
def main(stock):
|
116 |
+
#Specifying model
|
117 |
+
model="mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
|
118 |
+
|
119 |
+
#Scraping top data from google news
|
120 |
+
google_news = GNews()
|
121 |
+
Company_news=google_news.get_news(stock + "share")
|
122 |
+
df=pd.DataFrame(Company_news)
|
123 |
+
|
124 |
+
#Discarding old rows
|
125 |
+
df=discard_old_rows(df)
|
126 |
+
|
127 |
+
#Cleaning the titles for sentiment analysis
|
128 |
+
values_list=extract_and_clean_titles(df)
|
129 |
+
|
130 |
+
#Sentiment Analysis
|
131 |
+
sentiment_analysis = pipeline(model=model)
|
132 |
+
|
133 |
+
#Predictions
|
134 |
+
predictions=analyze_sentiments(values_list,sentiment_analysis)
|
135 |
+
|
136 |
+
#Weighted Average
|
137 |
+
weighted_avg=calculate_weighted_average(predictions)
|
138 |
+
|
139 |
+
#Pie-Chart
|
140 |
+
pie_chart = sentiment_pie_chart(predictions)
|
141 |
+
|
142 |
+
return f'Weighted Sentiment Score: {weighted_avg:.2f}', pie_chart
|
143 |
+
|
144 |
+
iface = gr.Interface(
|
145 |
+
fn=main,
|
146 |
+
inputs=gr.inputs.Textbox(label="Stock"),
|
147 |
+
outputs=[gr.outputs.Textbox(label="Weighted Sentiment Score"), gr.outputs.Plot(label="Sentiment Distribution")]
|
148 |
+
)
|
149 |
+
|
150 |
+
if __name__ == "__main__":
|
151 |
+
iface.launch()
|