Spaces:
Running
Running
import gradio as gr | |
from gnews import GNews | |
import pandas as pd | |
from transformers import pipeline | |
from datetime import datetime, timedelta | |
import matplotlib.pyplot as plt | |
import tensorflow as tf | |
# def discard_old_rows(df): | |
# # Convert the 'published date' column to datetime | |
# df['published date'] = pd.to_datetime(df['published date'], format='%a, %d %b %Y %H:%M:%S %Z') | |
# # Get the current date | |
# current_date = datetime.utcnow() | |
# # Calculate the date two months ago | |
# two_months_ago = current_date - timedelta(days=60) | |
# # Filter the DataFrame to keep only the rows with 'published date' within the last two months | |
# df_filtered = df[df['published date'] >= two_months_ago] | |
# return df_filtered | |
def extract_and_clean_titles(df): | |
# Initialize an empty list to store the cleaned titles | |
values_list = [] | |
if(df.empty): | |
return values_list | |
# Iterate over each value in the 'title' column of the DataFrame | |
for value in df['title']: | |
# Find the position of the first hyphen in the title | |
index = value.find('-') | |
# Extract the part of the title before the hyphen | |
# If there's no hyphen, use the entire title | |
extracted_value = value[:index] if index >= 0 else value | |
# Remove any occurrences of '...' from the extracted value | |
cleaned_value = extracted_value.replace('...', '') | |
# Append the cleaned value to the list | |
values_list.append(cleaned_value) | |
# Return the list of cleaned titles | |
return values_list | |
def analyze_sentiments(values_list, sentiment_analysis): | |
# Initialize an empty list to store the sentiment predictions | |
prediction = [] | |
# Iterate over each news title in the values_list | |
for news in values_list: | |
# Perform sentiment analysis on the current news title | |
sentiment = sentiment_analysis(news) | |
# Append the resulting sentiment to the prediction list | |
prediction.append(sentiment) | |
# Return the list of sentiment predictions | |
return prediction | |
def calculate_weighted_average(predictions): | |
# Initialize the weighted average score to zero | |
weighted_avg = 0 | |
# Iterate over each prediction in the predictions list | |
for i in predictions: | |
# Check if the label of the first sentiment prediction is 'positive' | |
if i[0]['label'] == 'positive': | |
# Add the score to the weighted average (positive sentiment) | |
weighted_avg += 1 * i[0]['score'] | |
# Check if the label of the first sentiment prediction is 'negative' | |
elif i[0]['label'] == 'negative': | |
# Subtract the score from the weighted average (negative sentiment) | |
weighted_avg += -1 * i[0]['score'] | |
# Calculate the weighted average by dividing by the number of predictions | |
weighted_avg /= len(predictions) | |
# Return the calculated weighted average | |
return weighted_avg | |
def sentiment_pie_chart(predictions, stock ,output_path='sentiment_pie_chart.png'): | |
""" | |
Generates a pie chart for sentiment distribution. | |
""" | |
positive_count = 0 | |
negative_count = 0 | |
neutral_count = 0 | |
for item in predictions: | |
label = item[0]['label'] | |
if label == 'positive': | |
positive_count += 1 | |
elif label == 'negative': | |
negative_count += 1 | |
elif label == 'neutral': | |
neutral_count += 1 | |
labels = ['Positive', 'Negative', 'Neutral'] | |
sizes = [positive_count, negative_count, neutral_count] | |
colors = ['#66BB6A', '#EF5350', '#42A5F5'] | |
fig, ax = plt.subplots() | |
ax.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90, pctdistance=0.85) | |
center_circle = plt.Circle((0, 0), 0.70, fc='white') | |
fig.gca().add_artist(center_circle) | |
ax.axis('equal') | |
plt.title('Sentiment Analysis Results for ' + stock + ' Stock') | |
# Save the plot as an image file | |
plt.savefig(output_path) | |
plt.close(fig) | |
return output_path | |
def main(stock): | |
#Specifying model | |
model="mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis" | |
#Scraping top data from google news | |
google_news = GNews(max_results=50, period='30d') | |
Company_news=google_news.get_news(stock + "stock") | |
df=pd.DataFrame(Company_news) | |
print(df) | |
#Discarding old rows | |
# df=discard_old_rows(df) | |
if(df.empty): | |
return "Not enough data, please increase timeframe", None | |
#Cleaning the titles for sentiment analysis | |
values_list=extract_and_clean_titles(df) | |
#Sentiment Analysis | |
sentiment_analysis = pipeline(model=model) | |
#Predictions | |
predictions=analyze_sentiments(values_list,sentiment_analysis) | |
#Weighted Average | |
weighted_avg=calculate_weighted_average(predictions) | |
#Pie-Chart | |
pie_chart_path = sentiment_pie_chart(predictions, stock) | |
if(weighted_avg>=-0.10 and weighted_avg<=0.10): | |
return f'{weighted_avg:.2f} (Stagnant)', pie_chart_path | |
elif(weighted_avg>0.1): | |
return f'{weighted_avg:.2f} (Positive)', pie_chart_path | |
else: | |
return f'{weighted_avg:.2f} (Negative)', pie_chart_path | |
iface = gr.Interface( | |
fn=main, | |
inputs=["textbox"], | |
outputs=["textbox","image"] | |
) | |
if __name__ == "__main__": | |
iface.launch() |