Spaces:
Running
Running
File size: 5,417 Bytes
3069ab1 10bfc4e 3069ab1 10bfc4e 3069ab1 10bfc4e 3069ab1 10bfc4e 3069ab1 10bfc4e 3069ab1 10bfc4e 3069ab1 10bfc4e 3069ab1 10bfc4e 3069ab1 10bfc4e 3069ab1 10bfc4e 3069ab1 10bfc4e 3069ab1 10bfc4e 3069ab1 10bfc4e 3069ab1 10bfc4e 3069ab1 c53d975 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import streamlit as st
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import pandas as pd
import plotly
import plotly.express as px
import json # for graph plotting in website
# NLTK VADER for sentiment analysis
from dateutil import parser
import nltk
nltk.downloader.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import subprocess
import os
import datetime
st.set_page_config(page_title = "Stock News Sentiment Analyzer", layout = "wide")
def get_news(ticker):
url = finviz_url + ticker
req = Request(url=url,headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'})
response = urlopen(req)
# Read the contents of the file into 'html'
html = BeautifulSoup(response)
# Find 'news-table' in the Soup and load it into 'news_table'
news_table = html.find(id='news-table')
return news_table
# parse news into dataframe
def parse_news(news_table):
parsed_news = []
for x in news_table.findAll('tr'):
try:
# Get the headline text
text = x.a.get_text()
# Get the date and time from the first <td> tag
date_scrape = x.td.text.strip().split()
# Handle cases where only time is present
if len(date_scrape) == 1:
date = datetime.datetime.today().strftime('%Y-%m-%d')
time = date_scrape[0]
else:
date = date_scrape[0]
time = date_scrape[1]
# Parse the date and time using dateutil.parser
datetime_str = f"{date} {time}"
datetime_parsed = parser.parse(datetime_str)
# Append the parsed news to the list
parsed_news.append([datetime_parsed, text])
except Exception as e:
print("Error parsing news:", e)
continue
# Convert the list to a DataFrame
columns = ['datetime', 'headline']
parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
return parsed_news_df
def score_news(parsed_news_df):
# Instantiate the sentiment intensity analyzer
vader = SentimentIntensityAnalyzer()
# Iterate through the headlines and get the polarity scores using vader
scores = parsed_news_df['headline'].apply(vader.polarity_scores).tolist()
# Convert the 'scores' list of dicts into a DataFrame
scores_df = pd.DataFrame(scores)
# Join the DataFrames of the news and the list of dicts
parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right')
parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')
parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})
return parsed_and_scored_news
def plot_hourly_sentiment(parsed_and_scored_news, ticker):
# Ensure that only numeric columns are resampled
numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
# Group by date and ticker columns from scored_news and calculate the mean
mean_scores = numeric_cols.resample('h').mean()
# Plot a bar chart with Plotly
fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=ticker + ' Hourly Sentiment Scores')
return fig # Return the figure to display in the Streamlit app
def plot_daily_sentiment(parsed_and_scored_news, ticker):
# Ensure that only numeric columns are resampled
numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
# Group by date and ticker columns from scored_news and calculate the mean
mean_scores = numeric_cols.resample('D').mean()
# Plot a bar chart with Plotly
fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=ticker + ' Daily Sentiment Scores')
return fig # Return the figure to display in the Streamlit app
# for extracting data from finviz
finviz_url = 'https://finviz.com/quote.ashx?t='
st.header("Stock News Sentiment Analyzer")
ticker = st.text_input('Enter Stock Ticker', '').upper()
df = pd.DataFrame({'datetime': datetime.datetime.now(), 'ticker': ticker}, index = [0])
try:
st.subheader("Hourly and Daily Sentiment of {} Stock".format(ticker))
news_table = get_news(ticker)
parsed_news_df = parse_news(news_table)
print(parsed_news_df)
parsed_and_scored_news = score_news(parsed_news_df)
fig_hourly = plot_hourly_sentiment(parsed_and_scored_news, ticker)
fig_daily = plot_daily_sentiment(parsed_and_scored_news, ticker)
st.plotly_chart(fig_hourly)
st.plotly_chart(fig_daily)
description = """
The above chart averages the sentiment scores of {} stock hourly and daily.
The table below gives each of the most recent headlines of the stock and the negative, neutral, positive and an aggregated sentiment score.
The news headlines are obtained from the FinViz website.
Sentiments are given by the nltk.sentiment.vader Python library.
""".format(ticker)
st.write(description)
st.table(parsed_and_scored_news)
except Exception as e:
print(str(e))
st.write("Enter a correct stock ticker, e.g. 'AAPL' above and hit Enter.")
hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True) |