Spaces:
Sleeping
Sleeping
File size: 5,320 Bytes
d00b448 4f4ee2f d00b448 4f4ee2f d00b448 4f4ee2f d00b448 4f4ee2f d00b448 4f4ee2f d00b448 4f4ee2f d00b448 4f4ee2f d00b448 4f4ee2f d00b448 4f4ee2f d00b448 70e3016 4f4ee2f d00b448 4f4ee2f d00b448 4f4ee2f d00b448 4f4ee2f d00b448 4f4ee2f d00b448 4f4ee2f af3031c d00b448 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
import gradio as gr
from gnews import GNews
import pandas as pd
from transformers import pipeline
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import tensorflow as tf
# def discard_old_rows(df):
# # Convert the 'published date' column to datetime
# df['published date'] = pd.to_datetime(df['published date'], format='%a, %d %b %Y %H:%M:%S %Z')
# # Get the current date
# current_date = datetime.utcnow()
# # Calculate the date two months ago
# two_months_ago = current_date - timedelta(days=60)
# # Filter the DataFrame to keep only the rows with 'published date' within the last two months
# df_filtered = df[df['published date'] >= two_months_ago]
# return df_filtered
def extract_and_clean_titles(df):
# Initialize an empty list to store the cleaned titles
values_list = []
if(df.empty):
return values_list
# Iterate over each value in the 'title' column of the DataFrame
for value in df['title']:
# Find the position of the first hyphen in the title
index = value.find('-')
# Extract the part of the title before the hyphen
# If there's no hyphen, use the entire title
extracted_value = value[:index] if index >= 0 else value
# Remove any occurrences of '...' from the extracted value
cleaned_value = extracted_value.replace('...', '')
# Append the cleaned value to the list
values_list.append(cleaned_value)
# Return the list of cleaned titles
return values_list
def analyze_sentiments(values_list, sentiment_analysis):
# Initialize an empty list to store the sentiment predictions
prediction = []
# Iterate over each news title in the values_list
for news in values_list:
# Perform sentiment analysis on the current news title
sentiment = sentiment_analysis(news)
# Append the resulting sentiment to the prediction list
prediction.append(sentiment)
# Return the list of sentiment predictions
return prediction
def calculate_weighted_average(predictions):
# Initialize the weighted average score to zero
weighted_avg = 0
# Iterate over each prediction in the predictions list
for i in predictions:
# Check if the label of the first sentiment prediction is 'positive'
if i[0]['label'] == 'positive':
# Add the score to the weighted average (positive sentiment)
weighted_avg += 1 * i[0]['score']
# Check if the label of the first sentiment prediction is 'negative'
elif i[0]['label'] == 'negative':
# Subtract the score from the weighted average (negative sentiment)
weighted_avg += -1 * i[0]['score']
# Calculate the weighted average by dividing by the number of predictions
weighted_avg /= len(predictions)
# Return the calculated weighted average
return weighted_avg
def sentiment_pie_chart(predictions, stock ,output_path='sentiment_pie_chart.png'):
"""
Generates a pie chart for sentiment distribution.
"""
positive_count = 0
negative_count = 0
neutral_count = 0
for item in predictions:
label = item[0]['label']
if label == 'positive':
positive_count += 1
elif label == 'negative':
negative_count += 1
elif label == 'neutral':
neutral_count += 1
labels = ['Positive', 'Negative', 'Neutral']
sizes = [positive_count, negative_count, neutral_count]
colors = ['#66BB6A', '#EF5350', '#42A5F5']
fig, ax = plt.subplots()
ax.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90, pctdistance=0.85)
center_circle = plt.Circle((0, 0), 0.70, fc='white')
fig.gca().add_artist(center_circle)
ax.axis('equal')
plt.title('Sentiment Analysis Results for ' + stock + ' Stock')
# Save the plot as an image file
plt.savefig(output_path)
plt.close(fig)
return output_path
def main(stock):
#Specifying model
model="mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
#Scraping top data from google news
google_news = GNews(max_results=50, period='45d')
Company_news=google_news.get_news(stock + "stock")
df=pd.DataFrame(Company_news)
print(df)
#Discarding old rows
# df=discard_old_rows(df)
if(df.empty):
return "Not enough data, please increase timeframe", None
#Cleaning the titles for sentiment analysis
values_list=extract_and_clean_titles(df)
#Sentiment Analysis
sentiment_analysis = pipeline(model=model)
#Predictions
predictions=analyze_sentiments(values_list,sentiment_analysis)
#Weighted Average
weighted_avg=calculate_weighted_average(predictions)
#Pie-Chart
pie_chart_path = sentiment_pie_chart(predictions, stock)
if(weighted_avg>=-0.10 and weighted_avg<=0.10):
return f'{weighted_avg:.2f} (Stagnant)', pie_chart_path
elif(weighted_avg>0.1):
return f'{weighted_avg:.2f} (Positive)', pie_chart_path
else:
return f'{weighted_avg:.2f} (Negative)', pie_chart_path
iface = gr.Interface(
fn=main,
inputs=["textbox"],
outputs=["textbox","image"]
)
if __name__ == "__main__":
iface.launch() |