File size: 5,320 Bytes
d00b448
 
 
 
 
4f4ee2f
 
d00b448
 
4f4ee2f
 
 
d00b448
4f4ee2f
 
d00b448
4f4ee2f
 
d00b448
4f4ee2f
 
d00b448
4f4ee2f
d00b448
 
 
 
 
4f4ee2f
 
d00b448
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4f4ee2f
d00b448
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4f4ee2f
 
 
 
 
 
 
d00b448
 
 
 
 
 
 
70e3016
4f4ee2f
d00b448
4f4ee2f
d00b448
4f4ee2f
 
 
d00b448
 
 
4f4ee2f
d00b448
 
 
 
 
 
 
 
 
 
4f4ee2f
 
 
 
 
 
 
 
 
 
d00b448
 
 
 
4f4ee2f
af3031c
d00b448
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import gradio as gr
from gnews import GNews
import pandas as pd
from transformers import pipeline
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import tensorflow as tf


# def discard_old_rows(df):
#     # Convert the 'published date' column to datetime
#     df['published date'] = pd.to_datetime(df['published date'], format='%a, %d %b %Y %H:%M:%S %Z')
    
#     # Get the current date
#     current_date = datetime.utcnow()
    
#     # Calculate the date two months ago
#     two_months_ago = current_date - timedelta(days=60)
    
#     # Filter the DataFrame to keep only the rows with 'published date' within the last two months
#     df_filtered = df[df['published date'] >= two_months_ago]
    
#     return df_filtered


def extract_and_clean_titles(df):
    # Initialize an empty list to store the cleaned titles
    values_list = []
    if(df.empty):
        return values_list
    # Iterate over each value in the 'title' column of the DataFrame
    for value in df['title']:
        # Find the position of the first hyphen in the title
        index = value.find('-')

        # Extract the part of the title before the hyphen
        # If there's no hyphen, use the entire title
        extracted_value = value[:index] if index >= 0 else value

        # Remove any occurrences of '...' from the extracted value
        cleaned_value = extracted_value.replace('...', '')

        # Append the cleaned value to the list
        values_list.append(cleaned_value)

    # Return the list of cleaned titles
    return values_list


def analyze_sentiments(values_list, sentiment_analysis):
    # Initialize an empty list to store the sentiment predictions
    prediction = []

    # Iterate over each news title in the values_list
    for news in values_list:
        # Perform sentiment analysis on the current news title
        sentiment = sentiment_analysis(news)

        # Append the resulting sentiment to the prediction list
        prediction.append(sentiment)

    # Return the list of sentiment predictions
    return prediction


def calculate_weighted_average(predictions):
    # Initialize the weighted average score to zero
    weighted_avg = 0

    # Iterate over each prediction in the predictions list
    for i in predictions:
        # Check if the label of the first sentiment prediction is 'positive'
        if i[0]['label'] == 'positive':
            # Add the score to the weighted average (positive sentiment)
            weighted_avg += 1 * i[0]['score']
        # Check if the label of the first sentiment prediction is 'negative'
        elif i[0]['label'] == 'negative':
            # Subtract the score from the weighted average (negative sentiment)
            weighted_avg += -1 * i[0]['score']

    # Calculate the weighted average by dividing by the number of predictions
    weighted_avg /= len(predictions)

    # Return the calculated weighted average
    return weighted_avg


def sentiment_pie_chart(predictions, stock ,output_path='sentiment_pie_chart.png'):
    """
    Generates a pie chart for sentiment distribution.
    """
    positive_count = 0
    negative_count = 0
    neutral_count = 0

    for item in predictions:
        label = item[0]['label']
        if label == 'positive':
            positive_count += 1
        elif label == 'negative':
            negative_count += 1
        elif label == 'neutral':
            neutral_count += 1

    labels = ['Positive', 'Negative', 'Neutral']
    sizes = [positive_count, negative_count, neutral_count]
    colors = ['#66BB6A', '#EF5350', '#42A5F5']

    fig, ax = plt.subplots()
    ax.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90, pctdistance=0.85)
    center_circle = plt.Circle((0, 0), 0.70, fc='white')
    fig.gca().add_artist(center_circle)
    ax.axis('equal')
    plt.title('Sentiment Analysis Results for ' + stock + ' Stock')

    # Save the plot as an image file
    plt.savefig(output_path)
    plt.close(fig)
    return output_path



def main(stock):
    #Specifying model 
    model="mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
    
    #Scraping top data from google news
    google_news = GNews(max_results=50, period='45d')
    Company_news=google_news.get_news(stock + "stock")
    df=pd.DataFrame(Company_news)
    print(df)
    #Discarding old rows
    # df=discard_old_rows(df)
    if(df.empty): 
        return "Not enough data, please increase timeframe", None
    #Cleaning the titles for sentiment analysis
    values_list=extract_and_clean_titles(df)
    
    
    #Sentiment Analysis
    sentiment_analysis = pipeline(model=model)

    #Predictions
    predictions=analyze_sentiments(values_list,sentiment_analysis)

    #Weighted Average
    weighted_avg=calculate_weighted_average(predictions)

    #Pie-Chart
    pie_chart_path = sentiment_pie_chart(predictions, stock)

    if(weighted_avg>=-0.10 and weighted_avg<=0.10):
        return f'{weighted_avg:.2f} (Stagnant)', pie_chart_path
    elif(weighted_avg>0.1):
        return f'{weighted_avg:.2f} (Positive)', pie_chart_path
    else:
        return f'{weighted_avg:.2f} (Negative)', pie_chart_path 




iface = gr.Interface(
    fn=main,
    inputs=["textbox"],
    outputs=["textbox","image"]
)

if __name__ == "__main__":
    iface.launch()