File size: 5,417 Bytes
3069ab1
 
 
 
 
 
 
 
10bfc4e
3069ab1
 
 
 
 
 
 
 
 
10bfc4e
3069ab1
 
 
 
 
 
 
 
 
 
 
 
 
10bfc4e
 
3069ab1
 
 
 
 
10bfc4e
 
 
 
 
 
3069ab1
10bfc4e
3069ab1
 
 
 
10bfc4e
 
 
 
 
 
 
3069ab1
10bfc4e
 
 
 
 
 
 
 
3069ab1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10bfc4e
3069ab1
10bfc4e
 
 
3069ab1
10bfc4e
3069ab1
10bfc4e
 
 
3069ab1
 
10bfc4e
 
 
3069ab1
10bfc4e
 
 
 
 
3069ab1
 
 
 
 
 
10bfc4e
3069ab1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c53d975
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import streamlit as st
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import pandas as pd
import plotly
import plotly.express as px
import json # for graph plotting in website
# NLTK VADER for sentiment analysis
from dateutil import parser
import nltk
nltk.downloader.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

import subprocess
import os

import datetime

st.set_page_config(page_title = "Stock News Sentiment Analyzer", layout = "wide")


def get_news(ticker):
    url = finviz_url + ticker
    req = Request(url=url,headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'}) 
    response = urlopen(req)    
    # Read the contents of the file into 'html'
    html = BeautifulSoup(response)
    # Find 'news-table' in the Soup and load it into 'news_table'
    news_table = html.find(id='news-table')
    return news_table
	
# parse news into dataframe


def parse_news(news_table):
    parsed_news = []
    
    for x in news_table.findAll('tr'):
        try:
            # Get the headline text
            text = x.a.get_text()
            # Get the date and time from the first <td> tag
            date_scrape = x.td.text.strip().split()
            
            # Handle cases where only time is present
            if len(date_scrape) == 1:
                date = datetime.datetime.today().strftime('%Y-%m-%d')
                time = date_scrape[0]
            else:
                date = date_scrape[0]
                time = date_scrape[1]

            # Parse the date and time using dateutil.parser
            datetime_str = f"{date} {time}"
            datetime_parsed = parser.parse(datetime_str)

            # Append the parsed news to the list
            parsed_news.append([datetime_parsed, text])
            
        except Exception as e:
            print("Error parsing news:", e)
            continue
    
    # Convert the list to a DataFrame
    columns = ['datetime', 'headline']
    parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
    
    return parsed_news_df
        
    
        
def score_news(parsed_news_df):
    # Instantiate the sentiment intensity analyzer
    vader = SentimentIntensityAnalyzer()
    
    # Iterate through the headlines and get the polarity scores using vader
    scores = parsed_news_df['headline'].apply(vader.polarity_scores).tolist()

    # Convert the 'scores' list of dicts into a DataFrame
    scores_df = pd.DataFrame(scores)

    # Join the DataFrames of the news and the list of dicts
    parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right')        
    parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')    
    parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})

    return parsed_and_scored_news



def plot_hourly_sentiment(parsed_and_scored_news, ticker):
    # Ensure that only numeric columns are resampled
    numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
    
    # Group by date and ticker columns from scored_news and calculate the mean
    mean_scores = numeric_cols.resample('h').mean()

    # Plot a bar chart with Plotly 
    fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=ticker + ' Hourly Sentiment Scores')
    return fig  # Return the figure to display in the Streamlit app

def plot_daily_sentiment(parsed_and_scored_news, ticker):
    # Ensure that only numeric columns are resampled
    numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])

    # Group by date and ticker columns from scored_news and calculate the mean
    mean_scores = numeric_cols.resample('D').mean()

    # Plot a bar chart with Plotly
    fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=ticker + ' Daily Sentiment Scores')
    return fig  # Return the figure to display in the Streamlit app


# for extracting data from finviz
finviz_url = 'https://finviz.com/quote.ashx?t='


st.header("Stock News Sentiment Analyzer")

ticker = st.text_input('Enter Stock Ticker', '').upper()

df = pd.DataFrame({'datetime': datetime.datetime.now(), 'ticker': ticker}, index = [0])


try:
	st.subheader("Hourly and Daily Sentiment of {} Stock".format(ticker))
	news_table = get_news(ticker)
	parsed_news_df = parse_news(news_table)
	print(parsed_news_df)
	parsed_and_scored_news = score_news(parsed_news_df)
	fig_hourly = plot_hourly_sentiment(parsed_and_scored_news, ticker)
	fig_daily = plot_daily_sentiment(parsed_and_scored_news, ticker) 
	 
	st.plotly_chart(fig_hourly)
	st.plotly_chart(fig_daily)

	description = """
		The above chart averages the sentiment scores of {} stock hourly and daily.
		The table below gives each of the most recent headlines of the stock and the negative, neutral, positive and an aggregated sentiment score.
		The news headlines are obtained from the FinViz website.
		Sentiments are given by the nltk.sentiment.vader Python library.
		""".format(ticker)
		
	st.write(description)	 
	st.table(parsed_and_scored_news)
	
except Exception as e:
	print(str(e))
	st.write("Enter a correct stock ticker, e.g. 'AAPL' above and hit Enter.")	

hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)