Atuljha's picture
Upload 6 files
3f4aa97 verified
raw
history blame
2.93 kB
from imports import *
from data_preprocessing import *
from web_scrapper import *
from model import*
# Download necessary NLTK data
nltk.download('wordnet')
nltk.download('omw-1.4')
st.set_page_config(page_title="News Sentiment Analysis")
st.title("News Sentiment Analysis")
use_logistic = False
model_used = st.radio("Select model for analysis",["Bert(Preferred)","Logistic"])
if model_used=="Logistic":
use_logistic=True
else:
use_logistic = False
if use_logistic:
logistic_model()
#Load the trained model and preprocessing objects
logreg = joblib.load('logreg_model.pkl')
with open('vectorizer.pkl', 'rb') as handle:
vectorizer = pickle.load(handle)
with open('label_encoder.pkl', 'rb') as handle:
label_encoder = pickle.load(handle)
else:
model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
domain = st.radio("Select Domain of news for analysis",["Sports","Tech","Politics","Entertainment","Business"],index=None)
if domain!=None:
if domain=="Sports":
titles, links = scrape_sports(url="https://www.indiatoday.in/search/sports", driver_path='chromedriver.exe')
elif domain=="Tech":
titles, links = scrape_sports(url="https://indianexpress.com/section/technology/", driver_path='chromedriver.exe')
elif domain=="Politics":
titles, links = scrape_sports(url="https://indianexpress.com/section/political-pulse/", driver_path='chromedriver.exe')
elif domain=="Entertainment":
titles, links = scrape_sports(url="https://indianexpress.com/section/entertainment/", driver_path='chromedriver.exe')
elif domain=="Business":
titles, links = scrape_sports(url="https://indianexpress.com/section/business/", driver_path='chromedriver.exe')
if use_logistic:
# Clean and preprocess titles
cleaned_titles = [clean(title) for title in titles]
lemmatized_titles = [lemmatize_text(title) for title in cleaned_titles]
transformed_titles = vectorizer.transform(lemmatized_titles)
# Make predictions
predictions = logreg.predict(transformed_titles)
predicted_labels = label_encoder.inverse_transform(predictions)
for title, link,sentiment in zip(titles, links,predicted_labels):
st.write(f"Title: {title}")
st.write(f"Link: {link}")
st.write(f"Predicted Sentiment: {sentiment}")
st.write("-----")
else:
# Display results
for title, link in zip(titles, links):
sentiment = analyze_sentiment(title,tokenizer,model)
st.write(f"Title: {title}")
st.write(f"Link: {link}")
st.write(f"Predicted Sentiment: {sentiment}")
st.write("-----")