Devis2awe commited on
Commit
d00b448
1 Parent(s): 50be842

refactored code to run in huggingface spaces

Browse files
Files changed (1) hide show
  1. app.py +151 -0
app.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gnews import GNews
3
+ import pandas as pd
4
+ from transformers import pipeline
5
+ from datetime import datetime, timedelta
6
+
7
+
8
+ def discard_old_rows(df):
9
+ # Convert the 'published date' column to datetime
10
+ df['published date'] = pd.to_datetime(df['published date'], format='%a, %d %b %Y %H:%M:%S %Z')
11
+
12
+ # Get the current date
13
+ current_date = datetime.utcnow()
14
+
15
+ # Calculate the date two months ago
16
+ two_months_ago = current_date - timedelta(days=60)
17
+
18
+ # Filter the DataFrame to keep only the rows with 'published date' within the last two months
19
+ df_filtered = df[df['published date'] >= two_months_ago]
20
+
21
+ return df_filtered
22
+
23
+
24
+ def extract_and_clean_titles(df):
25
+ # Initialize an empty list to store the cleaned titles
26
+ values_list = []
27
+
28
+ # Iterate over each value in the 'title' column of the DataFrame
29
+ for value in df['title']:
30
+ # Find the position of the first hyphen in the title
31
+ index = value.find('-')
32
+
33
+ # Extract the part of the title before the hyphen
34
+ # If there's no hyphen, use the entire title
35
+ extracted_value = value[:index] if index >= 0 else value
36
+
37
+ # Remove any occurrences of '...' from the extracted value
38
+ cleaned_value = extracted_value.replace('...', '')
39
+
40
+ # Append the cleaned value to the list
41
+ values_list.append(cleaned_value)
42
+
43
+ # Return the list of cleaned titles
44
+ return values_list
45
+
46
+
47
+ def analyze_sentiments(values_list, sentiment_analysis):
48
+ # Initialize an empty list to store the sentiment predictions
49
+ prediction = []
50
+
51
+ # Iterate over each news title in the values_list
52
+ for news in values_list:
53
+ # Perform sentiment analysis on the current news title
54
+ sentiment = sentiment_analysis(news)
55
+
56
+ # Append the resulting sentiment to the prediction list
57
+ prediction.append(sentiment)
58
+
59
+ # Return the list of sentiment predictions
60
+ return prediction
61
+
62
+
63
+ def calculate_weighted_average(predictions):
64
+ # Initialize the weighted average score to zero
65
+ weighted_avg = 0
66
+
67
+ # Iterate over each prediction in the predictions list
68
+ for i in predictions:
69
+ # Check if the label of the first sentiment prediction is 'positive'
70
+ if i[0]['label'] == 'positive':
71
+ # Add the score to the weighted average (positive sentiment)
72
+ weighted_avg += 1 * i[0]['score']
73
+ # Check if the label of the first sentiment prediction is 'negative'
74
+ elif i[0]['label'] == 'negative':
75
+ # Subtract the score from the weighted average (negative sentiment)
76
+ weighted_avg += -1 * i[0]['score']
77
+
78
+ # Calculate the weighted average by dividing by the number of predictions
79
+ weighted_avg /= len(predictions)
80
+
81
+ # Return the calculated weighted average
82
+ return weighted_avg
83
+
84
+
85
+ def sentiment_pie_chart(predictions):
86
+ """
87
+ Generates a pie chart for sentiment distribution.
88
+ """
89
+ positive_count = 0
90
+ negative_count = 0
91
+ neutral_count = 0
92
+
93
+ for item in predictions:
94
+ label = item[0]['label']
95
+ if label == 'positive':
96
+ positive_count += 1
97
+ elif label == 'negative':
98
+ negative_count += 1
99
+ elif label == 'neutral':
100
+ neutral_count += 1
101
+
102
+ labels = ['Positive', 'Negative', 'Neutral']
103
+ sizes = [positive_count, negative_count, neutral_count]
104
+ colors = ['#66BB6A', '#EF5350', '#42A5F5']
105
+
106
+ fig, ax = plt.subplots()
107
+ ax.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90, pctdistance=0.85)
108
+ center_circle = plt.Circle((0, 0), 0.70, fc='white')
109
+ fig.gca().add_artist(center_circle)
110
+ ax.axis('equal')
111
+ plt.title('Sentiment Analysis Results')
112
+ return fig
113
+
114
+
115
+ def main(stock):
116
+ #Specifying model
117
+ model="mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
118
+
119
+ #Scraping top data from google news
120
+ google_news = GNews()
121
+ Company_news=google_news.get_news(stock + "share")
122
+ df=pd.DataFrame(Company_news)
123
+
124
+ #Discarding old rows
125
+ df=discard_old_rows(df)
126
+
127
+ #Cleaning the titles for sentiment analysis
128
+ values_list=extract_and_clean_titles(df)
129
+
130
+ #Sentiment Analysis
131
+ sentiment_analysis = pipeline(model=model)
132
+
133
+ #Predictions
134
+ predictions=analyze_sentiments(values_list,sentiment_analysis)
135
+
136
+ #Weighted Average
137
+ weighted_avg=calculate_weighted_average(predictions)
138
+
139
+ #Pie-Chart
140
+ pie_chart = sentiment_pie_chart(predictions)
141
+
142
+ return f'Weighted Sentiment Score: {weighted_avg:.2f}', pie_chart
143
+
144
+ iface = gr.Interface(
145
+ fn=main,
146
+ inputs=gr.inputs.Textbox(label="Stock"),
147
+ outputs=[gr.outputs.Textbox(label="Weighted Sentiment Score"), gr.outputs.Plot(label="Sentiment Distribution")]
148
+ )
149
+
150
+ if __name__ == "__main__":
151
+ iface.launch()