Spaces:
Sleeping
Sleeping
from imports import * | |
from data_preprocessing import * | |
from web_scrapper import * | |
from model import* | |
# Download necessary NLTK data | |
nltk.download('wordnet') | |
nltk.download('omw-1.4') | |
st.set_page_config(page_title="News Sentiment Analysis") | |
st.title("News Sentiment Analysis") | |
use_logistic = False | |
model_used = st.radio("Select model for analysis",["Bert(Preferred)","Logistic"]) | |
if model_used=="Logistic": | |
use_logistic=True | |
else: | |
use_logistic = False | |
if use_logistic: | |
logistic_model() | |
#Load the trained model and preprocessing objects | |
logreg = joblib.load('logreg_model.pkl') | |
with open('vectorizer.pkl', 'rb') as handle: | |
vectorizer = pickle.load(handle) | |
with open('label_encoder.pkl', 'rb') as handle: | |
label_encoder = pickle.load(handle) | |
else: | |
model_name = "nlptown/bert-base-multilingual-uncased-sentiment" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
domain = st.radio("Select Domain of news for analysis",["Sports","Tech","Politics","Entertainment","Business"],index=None) | |
if domain!=None: | |
if domain=="Sports": | |
titles, links = scrape_sports(url="https://www.indiatoday.in/search/sports", driver_path='chromedriver.exe') | |
elif domain=="Tech": | |
titles, links = scrape_sports(url="https://indianexpress.com/section/technology/", driver_path='chromedriver.exe') | |
elif domain=="Politics": | |
titles, links = scrape_sports(url="https://indianexpress.com/section/political-pulse/", driver_path='chromedriver.exe') | |
elif domain=="Entertainment": | |
titles, links = scrape_sports(url="https://indianexpress.com/section/entertainment/", driver_path='chromedriver.exe') | |
elif domain=="Business": | |
titles, links = scrape_sports(url="https://indianexpress.com/section/business/", driver_path='chromedriver.exe') | |
if use_logistic: | |
# Clean and preprocess titles | |
cleaned_titles = [clean(title) for title in titles] | |
lemmatized_titles = [lemmatize_text(title) for title in cleaned_titles] | |
transformed_titles = vectorizer.transform(lemmatized_titles) | |
# Make predictions | |
predictions = logreg.predict(transformed_titles) | |
predicted_labels = label_encoder.inverse_transform(predictions) | |
for title, link,sentiment in zip(titles, links,predicted_labels): | |
st.write(f"Title: {title}") | |
st.write(f"Link: {link}") | |
st.write(f"Predicted Sentiment: {sentiment}") | |
st.write("-----") | |
else: | |
# Display results | |
for title, link in zip(titles, links): | |
sentiment = analyze_sentiment(title,tokenizer,model) | |
st.write(f"Title: {title}") | |
st.write(f"Link: {link}") | |
st.write(f"Predicted Sentiment: {sentiment}") | |
st.write("-----") | |