Spaces:
Running
Running
import streamlit as st | |
from urllib.request import urlopen, Request | |
from bs4 import BeautifulSoup | |
import pandas as pd | |
import plotly.express as px | |
from dateutil import parser | |
import datetime | |
import requests | |
from transformers import BertTokenizer, BertForSequenceClassification, pipeline | |
st.set_page_config(page_title="Stock News Confidence Analyzer ", layout="wide") | |
# Initialize FinBERT (yiyanghkust/finbert-tone) pipeline only once and cache | |
def load_model(): | |
model = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone', num_labels=3) | |
tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone') | |
return pipeline("sentiment-analysis", model=model, tokenizer=tokenizer) | |
finbert = load_model() | |
def verify_link(url, timeout=10, retries=3): | |
for _ in range(retries): | |
try: | |
response = requests.head(url, timeout=timeout, allow_redirects=True) | |
if 200 <= response.status_code < 300: | |
return True | |
except requests.RequestException: | |
continue | |
return False | |
def get_news(ticker): | |
try: | |
finviz_url = 'https://finviz.com/quote.ashx?t=' | |
url = finviz_url + ticker | |
req = Request(url=url, headers={'User-Agent': 'Mozilla/5.0'}) | |
response = urlopen(req) | |
html = BeautifulSoup(response, 'html.parser') | |
news_table = html.find(id='news-table') | |
return news_table | |
except Exception as e: | |
st.write("Error fetching news:", str(e)) | |
return None | |
def parse_news(news_table): | |
parsed_news = [] | |
for x in news_table.findAll('tr'): | |
try: | |
text = x.a.get_text() | |
link = x.a['href'] | |
date_scrape = x.td.text.strip().split() | |
if len(date_scrape) == 1: | |
date = datetime.datetime.today().strftime('%Y-%m-%d') | |
time = date_scrape[0] | |
else: | |
date = date_scrape[0] | |
time = date_scrape[1] | |
datetime_str = f"{date} {time}" | |
datetime_parsed = parser.parse(datetime_str) | |
is_valid = verify_link(link) | |
parsed_news.append([datetime_parsed, text, link, is_valid]) | |
except Exception as e: | |
print("Error parsing news:", e) | |
continue | |
columns = ['datetime', 'headline', 'link', 'is_valid'] | |
parsed_news_df = pd.DataFrame(parsed_news, columns=columns) | |
return parsed_news_df | |
def score_news(parsed_news_df): | |
# Send headlines in smaller batches to speed up processing | |
batch_size = 10 | |
parsed_news_df = parsed_news_df.reset_index(drop=True) | |
confidence_scores = [] | |
for i in range(0, len(parsed_news_df), batch_size): | |
batch_headlines = parsed_news_df['headline'][i:i+batch_size].tolist() | |
predictions = finbert(batch_headlines) | |
for pred in predictions: | |
confidence = pred['score'] # Only confidence score is considered | |
confidence_scores.append(confidence) | |
scores_df = pd.DataFrame({'confidence': confidence_scores}) | |
parsed_and_scored_news = pd.concat([parsed_news_df, scores_df], axis=1) | |
parsed_and_scored_news = parsed_and_scored_news.set_index('datetime') | |
return parsed_and_scored_news | |
def plot_hourly_confidence(parsed_and_scored_news, ticker): | |
mean_confidence = parsed_and_scored_news['confidence'].resample('h').mean() | |
fig = px.line(mean_confidence, x=mean_confidence.index, y='confidence', | |
title=f'{ticker} Hourly Confidence Scores', | |
color_discrete_sequence=['blue']) | |
fig.update_layout( | |
yaxis=dict( | |
title="Confidence Score", | |
range=[0, 1] | |
) | |
) | |
return fig | |
def plot_daily_confidence(parsed_and_scored_news, ticker): | |
mean_confidence = parsed_and_scored_news['confidence'].resample('D').mean() | |
fig = px.line(mean_confidence, x=mean_confidence.index, y='confidence', | |
title=f'{ticker} Daily Confidence Scores', | |
color_discrete_sequence=['blue']) | |
fig.update_layout( | |
yaxis=dict( | |
title="Confidence Score", | |
range=[0, 1] | |
) | |
) | |
return fig | |
def get_recommendation(confidence_scores): | |
avg_confidence = confidence_scores['confidence'].mean() | |
if avg_confidence >= 0.7: | |
return f"High confidence in predictions (Confidence Score: {avg_confidence:.2f}). The news analysis suggests a strong trend. Consider acting based on the news." | |
elif avg_confidence >= 0.4: | |
return f"Moderate confidence in predictions (Confidence Score: {avg_confidence:.2f}). The trend is somewhat clear. Be cautious and consider consulting other sources." | |
else: | |
return f"Low confidence in predictions (Confidence Score: {avg_confidence:.2f}). The news does not strongly indicate a particular trend. Consider waiting for more clear signals." | |
st.header("Stock News Confidence Analyzer (HKUST FinBERT)") | |
ticker = st.text_input('Enter Stock Ticker', '').upper() | |
try: | |
st.subheader(f"Confidence Analysis and Recommendation for {ticker} Stock") | |
news_table = get_news(ticker) | |
if news_table: | |
parsed_news_df = parse_news(news_table) | |
parsed_and_scored_news = score_news(parsed_news_df) | |
# Generate and display recommendation based on confidence scores | |
recommendation = get_recommendation(parsed_and_scored_news) | |
st.write(recommendation) | |
# Display a disclaimer | |
st.warning("Disclaimer: This recommendation is based solely on the confidence of the sentiment analysis. Always do your own research and consult with a qualified financial advisor before making investment decisions.") | |
fig_hourly = plot_hourly_confidence(parsed_and_scored_news, ticker) | |
fig_daily = plot_daily_confidence(parsed_and_scored_news, ticker) | |
st.plotly_chart(fig_hourly) | |
st.plotly_chart(fig_daily) | |
description = f""" | |
The above charts display the average confidence scores of {ticker} stock hourly and daily. | |
The table below shows recent headlines with their confidence scores. | |
The news headlines are obtained from the FinViz website. | |
Confidence scores indicate how certain the model is about its sentiment predictions. | |
Links have been verified for validity. | |
""" | |
st.write(description) | |
parsed_and_scored_news['link'] = parsed_and_scored_news.apply( | |
lambda row: f'<a href="{row["link"]}" target="_blank">{"Valid✅" if row["is_valid"] else "Invalid❌"} Link</a>', | |
axis=1 | |
) | |
display_df = parsed_and_scored_news.drop(columns=['is_valid']) | |
st.write(display_df[['headline', 'confidence', 'link']].to_html(escape=False), unsafe_allow_html=True) | |
else: | |
st.write("No news available or invalid ticker symbol.") | |
except Exception as e: | |
print(str(e)) | |
st.write("Enter a correct stock ticker, e.g. 'AAPL' above and hit Enter.") | |
hide_streamlit_style = """ | |
<style> | |
#MainMenu {visibility: hidden;} | |
footer {visibility: hidden;} | |
</style> | |
""" | |
st.markdown(hide_streamlit_style, unsafe_allow_html=True) | |