import streamlit as st
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import pandas as pd
import plotly
import plotly.express as px
import json # for graph plotting in website
# NLTK VADER for sentiment analysis
from dateutil import parser
import nltk
nltk.downloader.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import subprocess
import os
import datetime
st.set_page_config(page_title = "Stock News Sentiment Analyzer", layout = "wide")
def get_news(ticker):
url = finviz_url + ticker
req = Request(url=url,headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'})
response = urlopen(req)
# Read the contents of the file into 'html'
html = BeautifulSoup(response)
# Find 'news-table' in the Soup and load it into 'news_table'
news_table = html.find(id='news-table')
return news_table
# parse news into dataframe
def parse_news(news_table):
parsed_news = []
for x in news_table.findAll('tr'):
try:
# Get the headline text
text = x.a.get_text()
# Get the date and time from the first
tag
date_scrape = x.td.text.strip().split()
# Handle cases where only time is present
if len(date_scrape) == 1:
date = datetime.datetime.today().strftime('%Y-%m-%d')
time = date_scrape[0]
else:
date = date_scrape[0]
time = date_scrape[1]
# Parse the date and time using dateutil.parser
datetime_str = f"{date} {time}"
datetime_parsed = parser.parse(datetime_str)
# Append the parsed news to the list
parsed_news.append([datetime_parsed, text])
except Exception as e:
print("Error parsing news:", e)
continue
# Convert the list to a DataFrame
columns = ['datetime', 'headline']
parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
return parsed_news_df
def score_news(parsed_news_df):
# Instantiate the sentiment intensity analyzer
vader = SentimentIntensityAnalyzer()
# Iterate through the headlines and get the polarity scores using vader
scores = parsed_news_df['headline'].apply(vader.polarity_scores).tolist()
# Convert the 'scores' list of dicts into a DataFrame
scores_df = pd.DataFrame(scores)
# Join the DataFrames of the news and the list of dicts
parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right')
parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')
parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})
return parsed_and_scored_news
def plot_hourly_sentiment(parsed_and_scored_news, ticker):
# Ensure that only numeric columns are resampled
numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
# Group by date and ticker columns from scored_news and calculate the mean
mean_scores = numeric_cols.resample('h').mean()
# Plot a bar chart with Plotly
fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=ticker + ' Hourly Sentiment Scores')
return fig # Return the figure to display in the Streamlit app
def plot_daily_sentiment(parsed_and_scored_news, ticker):
# Ensure that only numeric columns are resampled
numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
# Group by date and ticker columns from scored_news and calculate the mean
mean_scores = numeric_cols.resample('D').mean()
# Plot a bar chart with Plotly
fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=ticker + ' Daily Sentiment Scores')
return fig # Return the figure to display in the Streamlit app
# for extracting data from finviz
finviz_url = 'https://finviz.com/quote.ashx?t='
st.header("Stock News Sentiment Analyzer")
ticker = st.text_input('Enter Stock Ticker', '').upper()
df = pd.DataFrame({'datetime': datetime.datetime.now(), 'ticker': ticker}, index = [0])
try:
st.subheader("Hourly and Daily Sentiment of {} Stock".format(ticker))
news_table = get_news(ticker)
parsed_news_df = parse_news(news_table)
print(parsed_news_df)
parsed_and_scored_news = score_news(parsed_news_df)
fig_hourly = plot_hourly_sentiment(parsed_and_scored_news, ticker)
fig_daily = plot_daily_sentiment(parsed_and_scored_news, ticker)
st.plotly_chart(fig_hourly)
st.plotly_chart(fig_daily)
description = """
The above chart averages the sentiment scores of {} stock hourly and daily.
The table below gives each of the most recent headlines of the stock and the negative, neutral, positive and an aggregated sentiment score.
The news headlines are obtained from the FinViz website.
Sentiments are given by the nltk.sentiment.vader Python library.
""".format(ticker)
st.write(description)
st.table(parsed_and_scored_news)
except Exception as e:
print(str(e))
st.write("Enter a correct stock ticker, e.g. 'AAPL' above and hit Enter.")
hide_streamlit_style = """
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True) |