Spaces:
Running
Running
Akshayram1
commited on
Commit
•
e6a496c
1
Parent(s):
7ddc5f5
Update app.py
Browse files
app.py
CHANGED
@@ -2,48 +2,41 @@ import streamlit as st
|
|
2 |
from urllib.request import urlopen, Request
|
3 |
from bs4 import BeautifulSoup
|
4 |
import pandas as pd
|
5 |
-
import plotly
|
6 |
import plotly.express as px
|
7 |
-
import json # for graph plotting in website
|
8 |
-
# NLTK VADER for sentiment analysis
|
9 |
from dateutil import parser
|
10 |
import nltk
|
11 |
nltk.downloader.download('vader_lexicon')
|
12 |
from nltk.sentiment.vader import SentimentIntensityAnalyzer
|
13 |
-
|
14 |
-
import subprocess
|
15 |
-
import os
|
16 |
-
|
17 |
import datetime
|
|
|
18 |
|
19 |
-
st.set_page_config(page_title
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
def get_news(ticker):
|
|
|
23 |
url = finviz_url + ticker
|
24 |
-
req = Request(url=url,headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'})
|
25 |
response = urlopen(req)
|
26 |
-
|
27 |
-
html = BeautifulSoup(response)
|
28 |
-
# Find 'news-table' in the Soup and load it into 'news_table'
|
29 |
news_table = html.find(id='news-table')
|
30 |
return news_table
|
31 |
-
|
32 |
-
# parse news into dataframe
|
33 |
-
|
34 |
|
35 |
def parse_news(news_table):
|
36 |
parsed_news = []
|
37 |
|
38 |
for x in news_table.findAll('tr'):
|
39 |
try:
|
40 |
-
# Get the headline text and link
|
41 |
text = x.a.get_text()
|
42 |
link = x.a['href']
|
43 |
-
# Get the date and time from the first <td> tag
|
44 |
date_scrape = x.td.text.strip().split()
|
45 |
|
46 |
-
# Handle cases where only time is present
|
47 |
if len(date_scrape) == 1:
|
48 |
date = datetime.datetime.today().strftime('%Y-%m-%d')
|
49 |
time = date_scrape[0]
|
@@ -51,83 +44,53 @@ def parse_news(news_table):
|
|
51 |
date = date_scrape[0]
|
52 |
time = date_scrape[1]
|
53 |
|
54 |
-
# Parse the date and time using dateutil.parser
|
55 |
datetime_str = f"{date} {time}"
|
56 |
datetime_parsed = parser.parse(datetime_str)
|
57 |
|
58 |
-
|
59 |
-
|
|
|
60 |
|
61 |
except Exception as e:
|
62 |
print("Error parsing news:", e)
|
63 |
continue
|
64 |
|
65 |
-
|
66 |
-
columns = ['datetime', 'headline', 'link']
|
67 |
parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
|
68 |
|
69 |
return parsed_news_df
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
def score_news(parsed_news_df):
|
74 |
-
# Instantiate the sentiment intensity analyzer
|
75 |
vader = SentimentIntensityAnalyzer()
|
76 |
|
77 |
-
# Iterate through the headlines and get the polarity scores using vader
|
78 |
scores = parsed_news_df['headline'].apply(vader.polarity_scores).tolist()
|
79 |
-
|
80 |
-
# Convert the 'scores' list of dicts into a DataFrame
|
81 |
scores_df = pd.DataFrame(scores)
|
82 |
-
|
83 |
-
# Join the DataFrames of the news and the list of dicts
|
84 |
parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right')
|
85 |
parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')
|
86 |
parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})
|
87 |
|
88 |
return parsed_and_scored_news
|
89 |
|
90 |
-
|
91 |
-
|
92 |
def plot_hourly_sentiment(parsed_and_scored_news, ticker):
|
93 |
-
# Ensure that only numeric columns are resampled
|
94 |
numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
|
95 |
-
|
96 |
-
# Group by date and ticker columns from scored_news and calculate the mean
|
97 |
mean_scores = numeric_cols.resample('h').mean()
|
98 |
-
|
99 |
-
|
100 |
-
fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=ticker + ' Hourly Sentiment Scores')
|
101 |
-
return fig # Return the figure to display in the Streamlit app
|
102 |
|
103 |
def plot_daily_sentiment(parsed_and_scored_news, ticker):
|
104 |
-
# Ensure that only numeric columns are resampled
|
105 |
numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
|
106 |
-
|
107 |
-
# Group by date and ticker columns from scored_news and calculate the mean
|
108 |
mean_scores = numeric_cols.resample('D').mean()
|
109 |
-
|
110 |
-
|
111 |
-
fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=ticker + ' Daily Sentiment Scores')
|
112 |
-
return fig # Return the figure to display in the Streamlit app
|
113 |
-
|
114 |
-
|
115 |
-
# for extracting data from finviz
|
116 |
-
finviz_url = 'https://finviz.com/quote.ashx?t='
|
117 |
-
|
118 |
|
119 |
st.header("Stock News Sentiment Analyzer")
|
120 |
|
121 |
ticker = st.text_input('Enter Stock Ticker', '').upper()
|
122 |
|
123 |
-
df = pd.DataFrame({'datetime': datetime.datetime.now(), 'ticker': ticker}, index = [0])
|
124 |
-
|
125 |
-
|
126 |
try:
|
127 |
-
st.subheader("Hourly and Daily Sentiment of {} Stock"
|
128 |
news_table = get_news(ticker)
|
129 |
parsed_news_df = parse_news(news_table)
|
130 |
-
print(parsed_news_df)
|
131 |
parsed_and_scored_news = score_news(parsed_news_df)
|
132 |
fig_hourly = plot_hourly_sentiment(parsed_and_scored_news, ticker)
|
133 |
fig_daily = plot_daily_sentiment(parsed_and_scored_news, ticker)
|
@@ -135,20 +98,22 @@ try:
|
|
135 |
st.plotly_chart(fig_hourly)
|
136 |
st.plotly_chart(fig_daily)
|
137 |
|
138 |
-
description = """
|
139 |
-
The above chart averages the sentiment scores of {} stock hourly and daily.
|
140 |
The table below gives each of the most recent headlines of the stock and the negative, neutral, positive and an aggregated sentiment score.
|
141 |
The news headlines are obtained from the FinViz website.
|
142 |
Sentiments are given by the nltk.sentiment.vader Python library.
|
143 |
-
|
|
|
144 |
|
145 |
st.write(description)
|
146 |
|
147 |
-
|
148 |
-
|
|
|
|
|
149 |
|
150 |
-
|
151 |
-
st.write(parsed_and_scored_news.to_html(escape=False), unsafe_allow_html=True)
|
152 |
|
153 |
except Exception as e:
|
154 |
print(str(e))
|
|
|
2 |
from urllib.request import urlopen, Request
|
3 |
from bs4 import BeautifulSoup
|
4 |
import pandas as pd
|
|
|
5 |
import plotly.express as px
|
|
|
|
|
6 |
from dateutil import parser
|
7 |
import nltk
|
8 |
nltk.downloader.download('vader_lexicon')
|
9 |
from nltk.sentiment.vader import SentimentIntensityAnalyzer
|
|
|
|
|
|
|
|
|
10 |
import datetime
|
11 |
+
import requests
|
12 |
|
13 |
+
st.set_page_config(page_title="Stock News Sentiment Analyzer", layout="wide")
|
14 |
|
15 |
+
def verify_link(url):
|
16 |
+
try:
|
17 |
+
response = requests.head(url, timeout=5)
|
18 |
+
return response.status_code == 200
|
19 |
+
except requests.RequestException:
|
20 |
+
return False
|
21 |
|
22 |
def get_news(ticker):
|
23 |
+
finviz_url = 'https://finviz.com/quote.ashx?t='
|
24 |
url = finviz_url + ticker
|
25 |
+
req = Request(url=url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'})
|
26 |
response = urlopen(req)
|
27 |
+
html = BeautifulSoup(response, 'html.parser')
|
|
|
|
|
28 |
news_table = html.find(id='news-table')
|
29 |
return news_table
|
|
|
|
|
|
|
30 |
|
31 |
def parse_news(news_table):
|
32 |
parsed_news = []
|
33 |
|
34 |
for x in news_table.findAll('tr'):
|
35 |
try:
|
|
|
36 |
text = x.a.get_text()
|
37 |
link = x.a['href']
|
|
|
38 |
date_scrape = x.td.text.strip().split()
|
39 |
|
|
|
40 |
if len(date_scrape) == 1:
|
41 |
date = datetime.datetime.today().strftime('%Y-%m-%d')
|
42 |
time = date_scrape[0]
|
|
|
44 |
date = date_scrape[0]
|
45 |
time = date_scrape[1]
|
46 |
|
|
|
47 |
datetime_str = f"{date} {time}"
|
48 |
datetime_parsed = parser.parse(datetime_str)
|
49 |
|
50 |
+
is_valid = verify_link(link)
|
51 |
+
|
52 |
+
parsed_news.append([datetime_parsed, text, link, is_valid])
|
53 |
|
54 |
except Exception as e:
|
55 |
print("Error parsing news:", e)
|
56 |
continue
|
57 |
|
58 |
+
columns = ['datetime', 'headline', 'link', 'is_valid']
|
|
|
59 |
parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
|
60 |
|
61 |
return parsed_news_df
|
62 |
+
|
|
|
|
|
63 |
def score_news(parsed_news_df):
|
|
|
64 |
vader = SentimentIntensityAnalyzer()
|
65 |
|
|
|
66 |
scores = parsed_news_df['headline'].apply(vader.polarity_scores).tolist()
|
|
|
|
|
67 |
scores_df = pd.DataFrame(scores)
|
|
|
|
|
68 |
parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right')
|
69 |
parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')
|
70 |
parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})
|
71 |
|
72 |
return parsed_and_scored_news
|
73 |
|
|
|
|
|
74 |
def plot_hourly_sentiment(parsed_and_scored_news, ticker):
|
|
|
75 |
numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
|
|
|
|
|
76 |
mean_scores = numeric_cols.resample('h').mean()
|
77 |
+
fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=f'{ticker} Hourly Sentiment Scores')
|
78 |
+
return fig
|
|
|
|
|
79 |
|
80 |
def plot_daily_sentiment(parsed_and_scored_news, ticker):
|
|
|
81 |
numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
|
|
|
|
|
82 |
mean_scores = numeric_cols.resample('D').mean()
|
83 |
+
fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=f'{ticker} Daily Sentiment Scores')
|
84 |
+
return fig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
|
86 |
st.header("Stock News Sentiment Analyzer")
|
87 |
|
88 |
ticker = st.text_input('Enter Stock Ticker', '').upper()
|
89 |
|
|
|
|
|
|
|
90 |
try:
|
91 |
+
st.subheader(f"Hourly and Daily Sentiment of {ticker} Stock")
|
92 |
news_table = get_news(ticker)
|
93 |
parsed_news_df = parse_news(news_table)
|
|
|
94 |
parsed_and_scored_news = score_news(parsed_news_df)
|
95 |
fig_hourly = plot_hourly_sentiment(parsed_and_scored_news, ticker)
|
96 |
fig_daily = plot_daily_sentiment(parsed_and_scored_news, ticker)
|
|
|
98 |
st.plotly_chart(fig_hourly)
|
99 |
st.plotly_chart(fig_daily)
|
100 |
|
101 |
+
description = f"""
|
102 |
+
The above chart averages the sentiment scores of {ticker} stock hourly and daily.
|
103 |
The table below gives each of the most recent headlines of the stock and the negative, neutral, positive and an aggregated sentiment score.
|
104 |
The news headlines are obtained from the FinViz website.
|
105 |
Sentiments are given by the nltk.sentiment.vader Python library.
|
106 |
+
Links have been verified for validity.
|
107 |
+
"""
|
108 |
|
109 |
st.write(description)
|
110 |
|
111 |
+
parsed_and_scored_news['link'] = parsed_and_scored_news.apply(
|
112 |
+
lambda row: f'<a href="{row["link"]}" target="_blank">{"Valid" if row["is_valid"] else "Invalid"} Link</a>',
|
113 |
+
axis=1
|
114 |
+
)
|
115 |
|
116 |
+
st.write(parsed_and_scored_news.drop(columns=['is_valid']).to_html(escape=False), unsafe_allow_html=True)
|
|
|
117 |
|
118 |
except Exception as e:
|
119 |
print(str(e))
|