Stock3

Running

App Files Files Community

Akshayram1 commited on Oct 13

Commit

e6a496c

•

1 Parent(s): 7ddc5f5

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -65

app.py CHANGED Viewed

@@ -2,48 +2,41 @@ import streamlit as st
 from urllib.request import urlopen, Request
 from bs4 import BeautifulSoup
 import pandas as pd
-import plotly
 import plotly.express as px
-import json # for graph plotting in website
-# NLTK VADER for sentiment analysis
 from dateutil import parser
 import nltk
 nltk.downloader.download('vader_lexicon')
 from nltk.sentiment.vader import SentimentIntensityAnalyzer
-import subprocess
-import os
 import datetime
-st.set_page_config(page_title = "Stock News Sentiment Analyzer", layout = "wide")
 def get_news(ticker):
     url = finviz_url + ticker
-    req = Request(url=url,headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'})
     response = urlopen(req)
-    # Read the contents of the file into 'html'
-    html = BeautifulSoup(response)
-    # Find 'news-table' in the Soup and load it into 'news_table'
     news_table = html.find(id='news-table')
     return news_table
-# parse news into dataframe
 def parse_news(news_table):
     parsed_news = []
     for x in news_table.findAll('tr'):
         try:
-            # Get the headline text and link
             text = x.a.get_text()
             link = x.a['href']
-            # Get the date and time from the first <td> tag
             date_scrape = x.td.text.strip().split()
-            # Handle cases where only time is present
             if len(date_scrape) == 1:
                 date = datetime.datetime.today().strftime('%Y-%m-%d')
                 time = date_scrape[0]
@@ -51,83 +44,53 @@ def parse_news(news_table):
                 date = date_scrape[0]
                 time = date_scrape[1]
-            # Parse the date and time using dateutil.parser
             datetime_str = f"{date} {time}"
             datetime_parsed = parser.parse(datetime_str)
-            # Append the parsed news to the list
-            parsed_news.append([datetime_parsed, text, link])
         except Exception as e:
             print("Error parsing news:", e)
             continue
-    # Convert the list to a DataFrame
-    columns = ['datetime', 'headline', 'link']
     parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
     return parsed_news_df
 def score_news(parsed_news_df):
-    # Instantiate the sentiment intensity analyzer
     vader = SentimentIntensityAnalyzer()
-    # Iterate through the headlines and get the polarity scores using vader
     scores = parsed_news_df['headline'].apply(vader.polarity_scores).tolist()
-    # Convert the 'scores' list of dicts into a DataFrame
     scores_df = pd.DataFrame(scores)
-    # Join the DataFrames of the news and the list of dicts
     parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right')
     parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')
     parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})
     return parsed_and_scored_news
 def plot_hourly_sentiment(parsed_and_scored_news, ticker):
-    # Ensure that only numeric columns are resampled
     numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
-    # Group by date and ticker columns from scored_news and calculate the mean
     mean_scores = numeric_cols.resample('h').mean()
-    # Plot a bar chart with Plotly
-    fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=ticker + ' Hourly Sentiment Scores')
-    return fig  # Return the figure to display in the Streamlit app
 def plot_daily_sentiment(parsed_and_scored_news, ticker):
-    # Ensure that only numeric columns are resampled
     numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
-    # Group by date and ticker columns from scored_news and calculate the mean
     mean_scores = numeric_cols.resample('D').mean()
-    # Plot a bar chart with Plotly
-    fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=ticker + ' Daily Sentiment Scores')
-    return fig  # Return the figure to display in the Streamlit app
-# for extracting data from finviz
-finviz_url = 'https://finviz.com/quote.ashx?t='
 st.header("Stock News Sentiment Analyzer")
 ticker = st.text_input('Enter Stock Ticker', '').upper()
-df = pd.DataFrame({'datetime': datetime.datetime.now(), 'ticker': ticker}, index = [0])
 try:
-    st.subheader("Hourly and Daily Sentiment of {} Stock".format(ticker))
     news_table = get_news(ticker)
     parsed_news_df = parse_news(news_table)
-    print(parsed_news_df)
     parsed_and_scored_news = score_news(parsed_news_df)
     fig_hourly = plot_hourly_sentiment(parsed_and_scored_news, ticker)
     fig_daily = plot_daily_sentiment(parsed_and_scored_news, ticker)
@@ -135,20 +98,22 @@ try:
     st.plotly_chart(fig_hourly)
     st.plotly_chart(fig_daily)
-    description = """
-        The above chart averages the sentiment scores of {} stock hourly and daily.
         The table below gives each of the most recent headlines of the stock and the negative, neutral, positive and an aggregated sentiment score.
         The news headlines are obtained from the FinViz website.
         Sentiments are given by the nltk.sentiment.vader Python library.
-        """.format(ticker)
     st.write(description)
-    # Convert links to clickable HTML
-    parsed_and_scored_news['link'] = parsed_and_scored_news['link'].apply(lambda x: f'<a href="{x}" target="_blank">Link</a>')
-    # Display the table with the new link column
-    st.write(parsed_and_scored_news.to_html(escape=False), unsafe_allow_html=True)
 except Exception as e:
     print(str(e))

 from urllib.request import urlopen, Request
 from bs4 import BeautifulSoup
 import pandas as pd
 import plotly.express as px
 from dateutil import parser
 import nltk
 nltk.downloader.download('vader_lexicon')
 from nltk.sentiment.vader import SentimentIntensityAnalyzer
 import datetime
+import requests
+st.set_page_config(page_title="Stock News Sentiment Analyzer", layout="wide")
+def verify_link(url):
+    try:
+        response = requests.head(url, timeout=5)
+        return response.status_code == 200
+    except requests.RequestException:
+        return False
 def get_news(ticker):
+    finviz_url = 'https://finviz.com/quote.ashx?t='
     url = finviz_url + ticker
+    req = Request(url=url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'})
     response = urlopen(req)
+    html = BeautifulSoup(response, 'html.parser')
     news_table = html.find(id='news-table')
     return news_table
 def parse_news(news_table):
     parsed_news = []
     for x in news_table.findAll('tr'):
         try:
             text = x.a.get_text()
             link = x.a['href']
             date_scrape = x.td.text.strip().split()
             if len(date_scrape) == 1:
                 date = datetime.datetime.today().strftime('%Y-%m-%d')
                 time = date_scrape[0]
                 date = date_scrape[0]
                 time = date_scrape[1]
             datetime_str = f"{date} {time}"
             datetime_parsed = parser.parse(datetime_str)
+            is_valid = verify_link(link)
+            parsed_news.append([datetime_parsed, text, link, is_valid])
         except Exception as e:
             print("Error parsing news:", e)
             continue
+    columns = ['datetime', 'headline', 'link', 'is_valid']
     parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
     return parsed_news_df
 def score_news(parsed_news_df):
     vader = SentimentIntensityAnalyzer()
     scores = parsed_news_df['headline'].apply(vader.polarity_scores).tolist()
     scores_df = pd.DataFrame(scores)
     parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right')
     parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')
     parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})
     return parsed_and_scored_news
 def plot_hourly_sentiment(parsed_and_scored_news, ticker):
     numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
     mean_scores = numeric_cols.resample('h').mean()
+    fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=f'{ticker} Hourly Sentiment Scores')
+    return fig
 def plot_daily_sentiment(parsed_and_scored_news, ticker):
     numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
     mean_scores = numeric_cols.resample('D').mean()
+    fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=f'{ticker} Daily Sentiment Scores')
+    return fig
 st.header("Stock News Sentiment Analyzer")
 ticker = st.text_input('Enter Stock Ticker', '').upper()
 try:
+    st.subheader(f"Hourly and Daily Sentiment of {ticker} Stock")
     news_table = get_news(ticker)
     parsed_news_df = parse_news(news_table)
     parsed_and_scored_news = score_news(parsed_news_df)
     fig_hourly = plot_hourly_sentiment(parsed_and_scored_news, ticker)
     fig_daily = plot_daily_sentiment(parsed_and_scored_news, ticker)
     st.plotly_chart(fig_hourly)
     st.plotly_chart(fig_daily)
+    description = f"""
+        The above chart averages the sentiment scores of {ticker} stock hourly and daily.
         The table below gives each of the most recent headlines of the stock and the negative, neutral, positive and an aggregated sentiment score.
         The news headlines are obtained from the FinViz website.
         Sentiments are given by the nltk.sentiment.vader Python library.
+        Links have been verified for validity.
+        """
     st.write(description)
+    parsed_and_scored_news['link'] = parsed_and_scored_news.apply(
+        lambda row: f'<a href="{row["link"]}" target="_blank">{"Valid" if row["is_valid"] else "Invalid"} Link</a>',
+        axis=1
+    )
+    st.write(parsed_and_scored_news.drop(columns=['is_valid']).to_html(escape=False), unsafe_allow_html=True)
 except Exception as e:
     print(str(e))