Akshayram1 commited on
Commit
e6a496c
1 Parent(s): 7ddc5f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -65
app.py CHANGED
@@ -2,48 +2,41 @@ import streamlit as st
2
  from urllib.request import urlopen, Request
3
  from bs4 import BeautifulSoup
4
  import pandas as pd
5
- import plotly
6
  import plotly.express as px
7
- import json # for graph plotting in website
8
- # NLTK VADER for sentiment analysis
9
  from dateutil import parser
10
  import nltk
11
  nltk.downloader.download('vader_lexicon')
12
  from nltk.sentiment.vader import SentimentIntensityAnalyzer
13
-
14
- import subprocess
15
- import os
16
-
17
  import datetime
 
18
 
19
- st.set_page_config(page_title = "Stock News Sentiment Analyzer", layout = "wide")
20
 
 
 
 
 
 
 
21
 
22
  def get_news(ticker):
 
23
  url = finviz_url + ticker
24
- req = Request(url=url,headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'})
25
  response = urlopen(req)
26
- # Read the contents of the file into 'html'
27
- html = BeautifulSoup(response)
28
- # Find 'news-table' in the Soup and load it into 'news_table'
29
  news_table = html.find(id='news-table')
30
  return news_table
31
-
32
- # parse news into dataframe
33
-
34
 
35
  def parse_news(news_table):
36
  parsed_news = []
37
 
38
  for x in news_table.findAll('tr'):
39
  try:
40
- # Get the headline text and link
41
  text = x.a.get_text()
42
  link = x.a['href']
43
- # Get the date and time from the first <td> tag
44
  date_scrape = x.td.text.strip().split()
45
 
46
- # Handle cases where only time is present
47
  if len(date_scrape) == 1:
48
  date = datetime.datetime.today().strftime('%Y-%m-%d')
49
  time = date_scrape[0]
@@ -51,83 +44,53 @@ def parse_news(news_table):
51
  date = date_scrape[0]
52
  time = date_scrape[1]
53
 
54
- # Parse the date and time using dateutil.parser
55
  datetime_str = f"{date} {time}"
56
  datetime_parsed = parser.parse(datetime_str)
57
 
58
- # Append the parsed news to the list
59
- parsed_news.append([datetime_parsed, text, link])
 
60
 
61
  except Exception as e:
62
  print("Error parsing news:", e)
63
  continue
64
 
65
- # Convert the list to a DataFrame
66
- columns = ['datetime', 'headline', 'link']
67
  parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
68
 
69
  return parsed_news_df
70
-
71
-
72
-
73
  def score_news(parsed_news_df):
74
- # Instantiate the sentiment intensity analyzer
75
  vader = SentimentIntensityAnalyzer()
76
 
77
- # Iterate through the headlines and get the polarity scores using vader
78
  scores = parsed_news_df['headline'].apply(vader.polarity_scores).tolist()
79
-
80
- # Convert the 'scores' list of dicts into a DataFrame
81
  scores_df = pd.DataFrame(scores)
82
-
83
- # Join the DataFrames of the news and the list of dicts
84
  parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right')
85
  parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')
86
  parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})
87
 
88
  return parsed_and_scored_news
89
 
90
-
91
-
92
  def plot_hourly_sentiment(parsed_and_scored_news, ticker):
93
- # Ensure that only numeric columns are resampled
94
  numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
95
-
96
- # Group by date and ticker columns from scored_news and calculate the mean
97
  mean_scores = numeric_cols.resample('h').mean()
98
-
99
- # Plot a bar chart with Plotly
100
- fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=ticker + ' Hourly Sentiment Scores')
101
- return fig # Return the figure to display in the Streamlit app
102
 
103
  def plot_daily_sentiment(parsed_and_scored_news, ticker):
104
- # Ensure that only numeric columns are resampled
105
  numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
106
-
107
- # Group by date and ticker columns from scored_news and calculate the mean
108
  mean_scores = numeric_cols.resample('D').mean()
109
-
110
- # Plot a bar chart with Plotly
111
- fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=ticker + ' Daily Sentiment Scores')
112
- return fig # Return the figure to display in the Streamlit app
113
-
114
-
115
- # for extracting data from finviz
116
- finviz_url = 'https://finviz.com/quote.ashx?t='
117
-
118
 
119
  st.header("Stock News Sentiment Analyzer")
120
 
121
  ticker = st.text_input('Enter Stock Ticker', '').upper()
122
 
123
- df = pd.DataFrame({'datetime': datetime.datetime.now(), 'ticker': ticker}, index = [0])
124
-
125
-
126
  try:
127
- st.subheader("Hourly and Daily Sentiment of {} Stock".format(ticker))
128
  news_table = get_news(ticker)
129
  parsed_news_df = parse_news(news_table)
130
- print(parsed_news_df)
131
  parsed_and_scored_news = score_news(parsed_news_df)
132
  fig_hourly = plot_hourly_sentiment(parsed_and_scored_news, ticker)
133
  fig_daily = plot_daily_sentiment(parsed_and_scored_news, ticker)
@@ -135,20 +98,22 @@ try:
135
  st.plotly_chart(fig_hourly)
136
  st.plotly_chart(fig_daily)
137
 
138
- description = """
139
- The above chart averages the sentiment scores of {} stock hourly and daily.
140
  The table below gives each of the most recent headlines of the stock and the negative, neutral, positive and an aggregated sentiment score.
141
  The news headlines are obtained from the FinViz website.
142
  Sentiments are given by the nltk.sentiment.vader Python library.
143
- """.format(ticker)
 
144
 
145
  st.write(description)
146
 
147
- # Convert links to clickable HTML
148
- parsed_and_scored_news['link'] = parsed_and_scored_news['link'].apply(lambda x: f'<a href="{x}" target="_blank">Link</a>')
 
 
149
 
150
- # Display the table with the new link column
151
- st.write(parsed_and_scored_news.to_html(escape=False), unsafe_allow_html=True)
152
 
153
  except Exception as e:
154
  print(str(e))
 
2
  from urllib.request import urlopen, Request
3
  from bs4 import BeautifulSoup
4
  import pandas as pd
 
5
  import plotly.express as px
 
 
6
  from dateutil import parser
7
  import nltk
8
  nltk.downloader.download('vader_lexicon')
9
  from nltk.sentiment.vader import SentimentIntensityAnalyzer
 
 
 
 
10
  import datetime
11
+ import requests
12
 
13
+ st.set_page_config(page_title="Stock News Sentiment Analyzer", layout="wide")
14
 
15
+ def verify_link(url):
16
+ try:
17
+ response = requests.head(url, timeout=5)
18
+ return response.status_code == 200
19
+ except requests.RequestException:
20
+ return False
21
 
22
  def get_news(ticker):
23
+ finviz_url = 'https://finviz.com/quote.ashx?t='
24
  url = finviz_url + ticker
25
+ req = Request(url=url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'})
26
  response = urlopen(req)
27
+ html = BeautifulSoup(response, 'html.parser')
 
 
28
  news_table = html.find(id='news-table')
29
  return news_table
 
 
 
30
 
31
  def parse_news(news_table):
32
  parsed_news = []
33
 
34
  for x in news_table.findAll('tr'):
35
  try:
 
36
  text = x.a.get_text()
37
  link = x.a['href']
 
38
  date_scrape = x.td.text.strip().split()
39
 
 
40
  if len(date_scrape) == 1:
41
  date = datetime.datetime.today().strftime('%Y-%m-%d')
42
  time = date_scrape[0]
 
44
  date = date_scrape[0]
45
  time = date_scrape[1]
46
 
 
47
  datetime_str = f"{date} {time}"
48
  datetime_parsed = parser.parse(datetime_str)
49
 
50
+ is_valid = verify_link(link)
51
+
52
+ parsed_news.append([datetime_parsed, text, link, is_valid])
53
 
54
  except Exception as e:
55
  print("Error parsing news:", e)
56
  continue
57
 
58
+ columns = ['datetime', 'headline', 'link', 'is_valid']
 
59
  parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
60
 
61
  return parsed_news_df
62
+
 
 
63
  def score_news(parsed_news_df):
 
64
  vader = SentimentIntensityAnalyzer()
65
 
 
66
  scores = parsed_news_df['headline'].apply(vader.polarity_scores).tolist()
 
 
67
  scores_df = pd.DataFrame(scores)
 
 
68
  parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right')
69
  parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')
70
  parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})
71
 
72
  return parsed_and_scored_news
73
 
 
 
74
  def plot_hourly_sentiment(parsed_and_scored_news, ticker):
 
75
  numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
 
 
76
  mean_scores = numeric_cols.resample('h').mean()
77
+ fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=f'{ticker} Hourly Sentiment Scores')
78
+ return fig
 
 
79
 
80
  def plot_daily_sentiment(parsed_and_scored_news, ticker):
 
81
  numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
 
 
82
  mean_scores = numeric_cols.resample('D').mean()
83
+ fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=f'{ticker} Daily Sentiment Scores')
84
+ return fig
 
 
 
 
 
 
 
85
 
86
  st.header("Stock News Sentiment Analyzer")
87
 
88
  ticker = st.text_input('Enter Stock Ticker', '').upper()
89
 
 
 
 
90
  try:
91
+ st.subheader(f"Hourly and Daily Sentiment of {ticker} Stock")
92
  news_table = get_news(ticker)
93
  parsed_news_df = parse_news(news_table)
 
94
  parsed_and_scored_news = score_news(parsed_news_df)
95
  fig_hourly = plot_hourly_sentiment(parsed_and_scored_news, ticker)
96
  fig_daily = plot_daily_sentiment(parsed_and_scored_news, ticker)
 
98
  st.plotly_chart(fig_hourly)
99
  st.plotly_chart(fig_daily)
100
 
101
+ description = f"""
102
+ The above chart averages the sentiment scores of {ticker} stock hourly and daily.
103
  The table below gives each of the most recent headlines of the stock and the negative, neutral, positive and an aggregated sentiment score.
104
  The news headlines are obtained from the FinViz website.
105
  Sentiments are given by the nltk.sentiment.vader Python library.
106
+ Links have been verified for validity.
107
+ """
108
 
109
  st.write(description)
110
 
111
+ parsed_and_scored_news['link'] = parsed_and_scored_news.apply(
112
+ lambda row: f'<a href="{row["link"]}" target="_blank">{"Valid" if row["is_valid"] else "Invalid"} Link</a>',
113
+ axis=1
114
+ )
115
 
116
+ st.write(parsed_and_scored_news.drop(columns=['is_valid']).to_html(escape=False), unsafe_allow_html=True)
 
117
 
118
  except Exception as e:
119
  print(str(e))