Spaces:
Running
Running
import streamlit as st | |
from transformers import AlbertTokenizer, AlbertForSequenceClassification | |
import torch | |
import trafilatura | |
import nltk | |
from nltk.tokenize import sent_tokenize | |
# Download NLTK data | |
nltk.download('punkt') | |
# Load the tokenizer and model from Hugging Face | |
tokenizer = AlbertTokenizer.from_pretrained("dejanseo/good-vibes") | |
model = AlbertForSequenceClassification.from_pretrained("dejanseo/good-vibes") | |
# Set Streamlit layout to wide | |
st.set_page_config(layout="wide") | |
# Function to classify text and highlight "Good Vibes" (Label_0) with dynamic opacity | |
def classify_and_highlight(text, max_length=512): | |
sentences = sent_tokenize(text) | |
highlighted_text = "" | |
for sentence in sentences: | |
# Tokenize and classify each sentence separately | |
inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True) | |
outputs = model(**inputs) | |
softmax_scores = torch.softmax(outputs.logits, dim=-1) | |
prediction = torch.argmax(softmax_scores, dim=-1).item() | |
confidence = softmax_scores[0][prediction].item() * 100 | |
if prediction == 0: # Label_0 corresponds to "Good Vibes" | |
# Adjust opacity calculation: base +10% | |
opacity = ((confidence - 50) / 100) + 0.1 | |
highlighted_text += f'<span style="background-color: rgba(0, 255, 0, {opacity});" title="{confidence:.2f}%">{sentence}</span> ' | |
else: | |
highlighted_text += f'{sentence} ' | |
return highlighted_text.strip() | |
# Function to extract content from URL using Trafilatura | |
def extract_content_from_url(url): | |
downloaded = trafilatura.fetch_url(url) | |
if downloaded: | |
return trafilatura.extract(downloaded) | |
else: | |
return None | |
# Streamlit app layout | |
st.title("Good Vibes Detector - SEO by DEJAN") | |
st.write("This app detects good vibes on the internet.") | |
mode = st.radio("Choose input mode", ("Paste text", "Enter URL")) | |
if mode == "Paste text": | |
user_text = st.text_area("Paste your text here:") | |
if st.button("Classify"): | |
if user_text: | |
result = classify_and_highlight(user_text) | |
st.markdown(result, unsafe_allow_html=True) | |
st.markdown("---") | |
st.write("This is a custom sentiment classification model developed by [Dejan Marketing](https://dejanmarketing.com/). If you'd like to do a large-scale sentiment analysis on your website or discuss your needs with our team, please [book an appointment here](https://dejanmarketing.com/conference/).") | |
else: | |
st.write("Please paste some text.") | |
elif mode == "Enter URL": | |
user_url = st.text_input("Enter the URL:") | |
if st.button("Extract and Classify"): | |
if user_url: | |
content = extract_content_from_url(user_url) | |
if content: | |
result = classify_and_highlight(content) | |
st.markdown(result, unsafe_allow_html=True) | |
st.markdown("---") | |
st.write("This is a custom sentiment classification model developed by [Dejan Marketing](https://dejanmarketing.com/). If you'd like to do a large-scale sentiment analysis on your website or discuss your needs with our team, please [book an appointment here](https://dejanmarketing.com/conference/).") | |
else: | |
st.write("Failed to extract content from the URL.") | |
else: | |
st.write("Please enter a URL.") | |