|
import streamlit as st |
|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
from sklearn.feature_extraction.text import CountVectorizer |
|
import seaborn as sns |
|
import plotly.express as px |
|
import plotly.io as pio |
|
import plotly.graph_objects as go |
|
|
|
|
|
st.set_page_config(layout="wide") |
|
|
|
|
|
def load_and_clean_data(): |
|
df1 = pd.read_csv("data/reviewed_social_media_english.csv") |
|
df2 = pd.read_csv("data/reviewed_news_english.csv") |
|
df3 = pd.read_csv("data/tamil_social_media.csv") |
|
df4 = pd.read_csv("data/tamil_news.csv") |
|
|
|
|
|
frames = [df1, df2, df3, df4] |
|
for frame in frames: |
|
frame['Domain'].replace("MUSLIM", "Muslim", inplace=True) |
|
frame.drop(frame[frame['Domain'] == 'Not relevant'].index, inplace=True) |
|
frame.drop(frame[frame['Domain'] == 'None'].index, inplace=True) |
|
frame.drop(frame[frame['Discrimination'] == 'None'].index, inplace=True) |
|
frame.drop(frame[frame['Sentiment'] == 'None'].index, inplace=True) |
|
|
|
|
|
df_combined = pd.concat(frames) |
|
return df_combined |
|
|
|
df = load_and_clean_data() |
|
|
|
|
|
df1['Domain'].replace("MUSLIM", "Muslim", inplace=True) |
|
df2['Domain'].replace("MUSLIM", "Muslim", inplace=True) |
|
df3['Domain'].replace("MUSLIM", "Muslim", inplace=True) |
|
df4['Domain'].replace("MUSLIM", "Muslim", inplace=True) |
|
|
|
|
|
frames = [df1, df2, df3, df4] |
|
for df in frames: |
|
df = df[df['Domain'] != 'Not relevant'] |
|
df = df[df['Domain'] != 'None'] |
|
df = df[df['Discrimination'] != 'None'] |
|
df = df[df['Sentiment'] != 'None'] |
|
|
|
|
|
df = pd.concat(frames) |
|
|
|
|
|
def create_visualizations(df): |
|
|
|
pass |
|
|
|
|
|
page = st.sidebar.selectbox("Choose a page", ["Overview", "Sentiment Analysis", "Discrimination Analysis", "Channel Analysis"]) |
|
|
|
if page == "Overview": |
|
create_visualizations(df) |
|
elif page == "Sentiment Analysis": |
|
create_visualizations(df) |
|
elif page == "Discrimination Analysis": |
|
create_visualizations(df) |
|
elif page == "Channel Analysis": |
|
create_visualizations(df) |
|
|
|
|
|
|
|
|
|
|
|
color_palette = px.colors.sequential.Viridis |
|
|
|
|
|
def create_domain_distribution_chart(df): |
|
fig = px.pie(df, names='Domain', title='Distribution of Domains', hole=0.35) |
|
fig.update_layout(title_x=0.5, margin=dict(l=20, r=20, t=30, b=20), legend=dict(x=0.1, y=1)) |
|
fig.update_traces(marker=dict(colors=color_palette)) |
|
return fig |
|
|
|
|
|
def create_sentiment_distribution_chart(df): |
|
|
|
fig.update_layout(margin=dict(l=20, r=20, t=40, b=20)) |
|
return fig |
|
|
|
|
|
|
|
|
|
def create_channel_sentiment_over_time_chart(df): |
|
df['Date'] = pd.to_datetime(df['Date']) |
|
timeline = df.groupby([df['Date'].dt.to_period('M'), 'Channel', 'Sentiment']).size().unstack(fill_value=0) |
|
fig = px.line(timeline, x=timeline.index.levels[1].to_timestamp(), y=['Positive', 'Negative', 'Neutral'], color='Channel') |
|
fig.update_layout(title='Channel-wise Sentiment Over Time', margin=dict(l=20, r=20, t=40, b=20)) |
|
return fig |
|
|
|
|
|
def create_channel_discrimination_chart(df): |
|
channel_discrimination = df.groupby(['Channel', 'Discrimination']).size().unstack(fill_value=0) |
|
fig = px.bar(channel_discrimination, x=channel_discrimination.index, y=['Discriminative', 'Non-Discriminative'], barmode='group') |
|
fig.update_layout(title='Channel-wise Distribution of Discriminative Content', margin=dict(l=20, r=20, t=40, b=20)) |
|
return fig |
|
|
|
|
|
def render_dashboard(): |
|
|
|
if page == "Overview": |
|
st.header("Overview of Domains and Sentiments") |
|
col1, col2 = st.beta_columns(2) |
|
with col1: |
|
st.plotly_chart(create_domain_distribution_chart(df)) |
|
with col2: |
|
st.plotly_chart(create_sentiment_distribution_chart(df)) |
|
|
|
|
|
|
|
|
|
|
|
domain_options = df['Domain'].unique() |
|
channel_options = df['Channel'].unique() |
|
sentiment_options = df['Sentiment'].unique() |
|
discrimination_options = df['Discrimination'].unique() |
|
|
|
domain_filter = st.sidebar.multiselect('Select Domain', options=domain_options, default=domain_options) |
|
channel_filter = st.sidebar.multiselect('Select Channel', options=channel_options, default=channel_options) |
|
sentiment_filter = st.sidebar.multiselect('Select Sentiment', options=sentiment_options, default=sentiment_options) |
|
discrimination_filter = st.sidebar.multiselect('Select Discrimination', options=discrimination_options, default=discrimination_options) |
|
|
|
|
|
df_filtered = df[(df['Domain'].isin(domain_filter)) & |
|
(df['Channel'].isin(channel_filter)) & |
|
(df['Sentiment'].isin(sentiment_filter)) & |
|
(df['Discrimination'].isin(discrimination_filter))] |
|
|
|
|
|
render_dashboard(df_filtered) |
|
|