financial_sentiment_analysis

Running on CPU Upgrade

File size: 3,953 Bytes

8c497f4
dcb533f
 
 
 
 
 
 
 
 
9e7bdda
 
9800fd5
9e7bdda
dcb533f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26f5171
dcb533f
b508635
dcb533f
 
8c497f4
 
dcb533f
ed9dbd0
dcb533f
 
 
 
 
 
 
 
 
 
 
 
 
7e815f0
dcb533f
 
 
 
 
 
 
 
7e815f0
dcb533f
 
 
7e815f0
ed9dbd0
dcb533f
 
7e815f0
dcb533f
 
7e815f0
280021e
862c4f1
5be3942
280021e
862c4f1
6091de8
280021e
6091de8
 
280021e
6091de8
 
280021e
5be3942
 
280021e
862c4f1
 
 
 
 
7e815f0
 
 
 
 
 
 
 
 
280021e

import gradio as gr
from transformers import pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn import metrics
import pandas as pd
from transformers.utils import logging

logging.set_verbosity("ERROR")

# Load the provided dataset
file_path = 'data.csv'
df = pd.read_csv(file_path)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(df['Sentence'], df['Sentiment'], test_size=0.2, random_state=42)

# Define models
nb_model = make_pipeline(TfidfVectorizer(), MultinomialNB())
svm_model = make_pipeline(TfidfVectorizer(), SVC(probability=True))
rf_model = make_pipeline(TfidfVectorizer(), RandomForestClassifier())

# Train models
nb_model.fit(X_train, y_train)
svm_model.fit(X_train, y_train)
rf_model.fit(X_train, y_train)

# Define sentences to choose from
sentences = [
    "The announced restructuring will erase the company's indebtedness.",
    "UPM-Kymmene upgraded to `in-line' from `underperform' by Goldman Sachs.",
    "Profitability (in EBIT %) was not impressive due to expenses rising by 14.3%.",
    "The Finnish bank has issued a profit warning.",
    "TeliaSonera's underlying results however included 457 mln SKr in positive one-offs, hence the adjusted underlying EBITDA actually amounts to 7.309 bln SKr, clearly below expectations, analysts said."
]

# Function to map BERT labels
def map_bert_label(label):
    if label in ["1 star", "2 stars"]:
        return "negative"
    elif label == "3 stars":
        return "neutral"
    elif label in ["4 stars", "5 stars"]:
        return "positive"

# Function to analyze sentiment
def analyze_sentiment(sentence):
    # Define model paths
    model_paths = {
        "BERT": "nlptown/bert-base-multilingual-uncased-sentiment",
    }
    
    # Analyze sentiment using transformers models
    results = {}
    for model_name, model_path in model_paths.items():
        sentiment_analyzer = pipeline("sentiment-analysis", model=model_path)
        result = sentiment_analyzer(sentence[:512])[0]  # Analyze first 512 characters for brevity
        if model_name == "BERT":
            result['label'] = map_bert_label(result['label'])
        results[model_name] = result
    
    # Analyze sentiment using sklearn models
    results["Naive Bayes"] = {"label": nb_model.predict([sentence])[0],
                              "score": nb_model.predict_proba([sentence]).max()}
    results["SVM"] = {"label": svm_model.predict([sentence])[0],
                      "score": svm_model.predict_proba([sentence]).max()}
    results["Random Forest"] = {"label": rf_model.predict([sentence])[0],
                                "score": rf_model.predict_proba([sentence]).max()}
    
    return sentence, results


# Define custom CSS with slightly larger font size
custom_css = """
.gradio-container, .gradio-container * {
    font-size: 0.65rem !important;
}
.gradio-container h1 {
    font-size: 1.1rem !important;
}
.gradio-container h2, .gradio-container h3 {
    font-size: 0.9rem !important;
}
.gradio-container .label {
    font-size: 0.75rem !important;
}
.gradio-container .output-markdown pre {
    font-size: 0.6rem !important;
}
"""

# Create Gradio interface with custom CSS
with gr.Blocks(css=custom_css) as demo:
    gr.Markdown("# Compare Sentiment Analysis Across Models")
    gr.Markdown("Select a sentence to see sentiment analysis results from multiple models.")
    
    dropdown = gr.Dropdown(choices=sentences, label="Select Sentence")
    text_output = gr.Textbox(label="Selected Sentence", lines=2)
    sentiment_output = gr.JSON(label="Sentiment Scores")
    
    dropdown.change(analyze_sentiment, inputs=[dropdown], outputs=[text_output, sentiment_output])

demo.launch()