File size: 5,674 Bytes
21ac434 7bf8be4 93b4f33 7bf8be4 93b4f33 7bf8be4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import CountVectorizer
import seaborn as sns
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
# Set page configuration
st.set_page_config(layout="wide")
def load_and_clean_data():
df1 = pd.read_csv("data/reviewed_social_media_english.csv")
df2 = pd.read_csv("data/reviewed_news_english.csv")
df3 = pd.read_csv("data/tamil_social_media.csv")
df4 = pd.read_csv("data/tamil_news.csv")
# Normalize Text and Drop irrelevant data
frames = [df1, df2, df3, df4]
for frame in frames:
frame['Domain'].replace("MUSLIM", "Muslim", inplace=True)
frame.drop(frame[frame['Domain'] == 'Not relevant'].index, inplace=True)
frame.drop(frame[frame['Domain'] == 'None'].index, inplace=True)
frame.drop(frame[frame['Discrimination'] == 'None'].index, inplace=True)
frame.drop(frame[frame['Sentiment'] == 'None'].index, inplace=True)
# Concatenate/merge dataframes
df_combined = pd.concat(frames)
return df_combined
df = load_and_clean_data()
# Normalize Text
df1['Domain'].replace("MUSLIM", "Muslim", inplace=True)
df2['Domain'].replace("MUSLIM", "Muslim", inplace=True)
df3['Domain'].replace("MUSLIM", "Muslim", inplace=True)
df4['Domain'].replace("MUSLIM", "Muslim", inplace=True)
# Drop irrelevant data
frames = [df1, df2, df3, df4]
for df in frames:
df = df[df['Domain'] != 'Not relevant']
df = df[df['Domain'] != 'None']
df = df[df['Discrimination'] != 'None']
df = df[df['Sentiment'] != 'None']
# Concatenate/merge dataframes
df = pd.concat(frames)
# Visualization function
def create_visualizations(df):
# [Existing visualization code]
pass
# Page navigation
page = st.sidebar.selectbox("Choose a page", ["Overview", "Sentiment Analysis", "Discrimination Analysis", "Channel Analysis"])
if page == "Overview":
create_visualizations(df) # Placeholder for overview visualizations
elif page == "Sentiment Analysis":
create_visualizations(df) # Placeholder for sentiment analysis visualizations
elif page == "Discrimination Analysis":
create_visualizations(df) # Placeholder for discrimination analysis visualizations
elif page == "Channel Analysis":
create_visualizations(df) # Placeholder for channel analysis visualizations
# [Place the rest of the code for the visualizations here]
# Define a color palette for consistent visualization styles
color_palette = px.colors.sequential.Viridis
# Function for Domain Distribution Chart
def create_domain_distribution_chart(df):
fig = px.pie(df, names='Domain', title='Distribution of Domains', hole=0.35)
fig.update_layout(title_x=0.5, margin=dict(l=20, r=20, t=30, b=20), legend=dict(x=0.1, y=1))
fig.update_traces(marker=dict(colors=color_palette))
return fig
# Function for Sentiment Distribution Across Domains Chart
def create_sentiment_distribution_chart(df):
# ... [Include the existing code for the Sentiment Distribution chart]
fig.update_layout(margin=dict(l=20, r=20, t=40, b=20))
return fig
# ... [Define other chart functions following the same pattern]
# Function for Channel-wise Sentiment Over Time Chart
def create_channel_sentiment_over_time_chart(df):
df['Date'] = pd.to_datetime(df['Date'])
timeline = df.groupby([df['Date'].dt.to_period('M'), 'Channel', 'Sentiment']).size().unstack(fill_value=0)
fig = px.line(timeline, x=timeline.index.levels[1].to_timestamp(), y=['Positive', 'Negative', 'Neutral'], color='Channel')
fig.update_layout(title='Channel-wise Sentiment Over Time', margin=dict(l=20, r=20, t=40, b=20))
return fig
# Function for Channel-wise Distribution of Discriminative Content Chart
def create_channel_discrimination_chart(df):
channel_discrimination = df.groupby(['Channel', 'Discrimination']).size().unstack(fill_value=0)
fig = px.bar(channel_discrimination, x=channel_discrimination.index, y=['Discriminative', 'Non-Discriminative'], barmode='group')
fig.update_layout(title='Channel-wise Distribution of Discriminative Content', margin=dict(l=20, r=20, t=40, b=20))
return fig
# Dashboard Layout
def render_dashboard():
# Overview page layout
if page == "Overview":
st.header("Overview of Domains and Sentiments")
col1, col2 = st.beta_columns(2)
with col1:
st.plotly_chart(create_domain_distribution_chart(df))
with col2:
st.plotly_chart(create_sentiment_distribution_chart(df))
# ... [Additional overview charts]
# ... [Other pages]
# Define Sidebar Filters
domain_options = df['Domain'].unique()
channel_options = df['Channel'].unique()
sentiment_options = df['Sentiment'].unique()
discrimination_options = df['Discrimination'].unique()
domain_filter = st.sidebar.multiselect('Select Domain', options=domain_options, default=domain_options)
channel_filter = st.sidebar.multiselect('Select Channel', options=channel_options, default=channel_options)
sentiment_filter = st.sidebar.multiselect('Select Sentiment', options=sentiment_options, default=sentiment_options)
discrimination_filter = st.sidebar.multiselect('Select Discrimination', options=discrimination_options, default=discrimination_options)
# Apply the filters to the dataframe
df_filtered = df[(df['Domain'].isin(domain_filter)) &
(df['Channel'].isin(channel_filter)) &
(df['Sentiment'].isin(sentiment_filter)) &
(df['Discrimination'].isin(discrimination_filter))]
# Render the dashboard with filtered data
render_dashboard(df_filtered)
|