import streamlit as st from urllib.request import urlopen, Request from bs4 import BeautifulSoup import pandas as pd import plotly import plotly.express as px import json # for graph plotting in website # NLTK VADER for sentiment analysis from dateutil import parser import nltk nltk.downloader.download('vader_lexicon') from nltk.sentiment.vader import SentimentIntensityAnalyzer import subprocess import os import datetime st.set_page_config(page_title = "Stock News Sentiment Analyzer", layout = "wide") def get_news(ticker): url = finviz_url + ticker req = Request(url=url,headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'}) response = urlopen(req) # Read the contents of the file into 'html' html = BeautifulSoup(response) # Find 'news-table' in the Soup and load it into 'news_table' news_table = html.find(id='news-table') return news_table # parse news into dataframe def parse_news(news_table): parsed_news = [] for x in news_table.findAll('tr'): try: # Get the headline text text = x.a.get_text() # Get the date and time from the first tag date_scrape = x.td.text.strip().split() # Handle cases where only time is present if len(date_scrape) == 1: date = datetime.datetime.today().strftime('%Y-%m-%d') time = date_scrape[0] else: date = date_scrape[0] time = date_scrape[1] # Parse the date and time using dateutil.parser datetime_str = f"{date} {time}" datetime_parsed = parser.parse(datetime_str) # Append the parsed news to the list parsed_news.append([datetime_parsed, text]) except Exception as e: print("Error parsing news:", e) continue # Convert the list to a DataFrame columns = ['datetime', 'headline'] parsed_news_df = pd.DataFrame(parsed_news, columns=columns) return parsed_news_df def score_news(parsed_news_df): # Instantiate the sentiment intensity analyzer vader = SentimentIntensityAnalyzer() # Iterate through the headlines and get the polarity scores using vader scores = parsed_news_df['headline'].apply(vader.polarity_scores).tolist() # Convert the 'scores' list of dicts into a DataFrame scores_df = pd.DataFrame(scores) # Join the DataFrames of the news and the list of dicts parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right') parsed_and_scored_news = parsed_and_scored_news.set_index('datetime') parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"}) return parsed_and_scored_news def plot_hourly_sentiment(parsed_and_scored_news, ticker): # Ensure that only numeric columns are resampled numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64']) # Group by date and ticker columns from scored_news and calculate the mean mean_scores = numeric_cols.resample('h').mean() # Plot a bar chart with Plotly fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=ticker + ' Hourly Sentiment Scores') return fig # Return the figure to display in the Streamlit app def plot_daily_sentiment(parsed_and_scored_news, ticker): # Ensure that only numeric columns are resampled numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64']) # Group by date and ticker columns from scored_news and calculate the mean mean_scores = numeric_cols.resample('D').mean() # Plot a bar chart with Plotly fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=ticker + ' Daily Sentiment Scores') return fig # Return the figure to display in the Streamlit app # for extracting data from finviz finviz_url = 'https://finviz.com/quote.ashx?t=' st.header("Stock News Sentiment Analyzer") ticker = st.text_input('Enter Stock Ticker', '').upper() df = pd.DataFrame({'datetime': datetime.datetime.now(), 'ticker': ticker}, index = [0]) try: st.subheader("Hourly and Daily Sentiment of {} Stock".format(ticker)) news_table = get_news(ticker) parsed_news_df = parse_news(news_table) print(parsed_news_df) parsed_and_scored_news = score_news(parsed_news_df) fig_hourly = plot_hourly_sentiment(parsed_and_scored_news, ticker) fig_daily = plot_daily_sentiment(parsed_and_scored_news, ticker) st.plotly_chart(fig_hourly) st.plotly_chart(fig_daily) description = """ The above chart averages the sentiment scores of {} stock hourly and daily. The table below gives each of the most recent headlines of the stock and the negative, neutral, positive and an aggregated sentiment score. The news headlines are obtained from the FinViz website. Sentiments are given by the nltk.sentiment.vader Python library. """.format(ticker) st.write(description) st.table(parsed_and_scored_news) except Exception as e: print(str(e)) st.write("Enter a correct stock ticker, e.g. 'AAPL' above and hit Enter.") hide_streamlit_style = """ """ st.markdown(hide_streamlit_style, unsafe_allow_html=True)