Update app.py
Browse files
app.py
CHANGED
|
@@ -10,20 +10,16 @@ import plotly.graph_objects as go
|
|
| 10 |
# Set page configuration
|
| 11 |
st.set_page_config(layout="wide")
|
| 12 |
|
|
|
|
| 13 |
def load_and_clean_data():
|
| 14 |
-
# Load data
|
| 15 |
df1 = pd.read_csv("data/reviewed_social_media_english.csv")
|
| 16 |
df2 = pd.read_csv("data/reviewed_news_english.csv")
|
| 17 |
df3 = pd.read_csv("data/tamil_social_media.csv")
|
| 18 |
df4 = pd.read_csv("data/tamil_news.csv")
|
| 19 |
|
| 20 |
-
# Concatenate dataframes
|
| 21 |
df_combined = pd.concat([df1, df2, df3, df4])
|
| 22 |
-
|
| 23 |
-
# Normalize Text
|
| 24 |
df_combined['Domain'] = df_combined['Domain'].replace("MUSLIM", "Muslim")
|
| 25 |
-
|
| 26 |
-
# Drop irrelevant data
|
| 27 |
df_combined = df_combined[df_combined['Domain'] != 'Not relevant']
|
| 28 |
df_combined = df_combined[df_combined['Domain'] != 'None']
|
| 29 |
df_combined = df_combined[df_combined['Discrimination'] != 'None']
|
|
@@ -31,12 +27,8 @@ def load_and_clean_data():
|
|
| 31 |
|
| 32 |
return df_combined
|
| 33 |
|
| 34 |
-
# Load and clean data
|
| 35 |
df = load_and_clean_data()
|
| 36 |
|
| 37 |
-
# Page navigation
|
| 38 |
-
page = st.sidebar.selectbox("Choose a page", ["Overview", "Sentiment Analysis", "Discrimination Analysis", "Channel Analysis"])
|
| 39 |
-
|
| 40 |
# Define Sidebar Filters
|
| 41 |
domain_options = df['Domain'].unique()
|
| 42 |
channel_options = df['Channel'].unique()
|
|
@@ -48,53 +40,47 @@ channel_filter = st.sidebar.multiselect('Select Channel', options=channel_option
|
|
| 48 |
sentiment_filter = st.sidebar.multiselect('Select Sentiment', options=sentiment_options, default=sentiment_options)
|
| 49 |
discrimination_filter = st.sidebar.multiselect('Select Discrimination', options=discrimination_options, default=discrimination_options)
|
| 50 |
|
| 51 |
-
# Apply
|
| 52 |
df_filtered = df[(df['Domain'].isin(domain_filter)) &
|
| 53 |
(df['Channel'].isin(channel_filter)) &
|
| 54 |
(df['Sentiment'].isin(sentiment_filter)) &
|
| 55 |
(df['Discrimination'].isin(discrimination_filter))]
|
| 56 |
|
| 57 |
-
|
| 58 |
# Define a color palette for consistent visualization styles
|
| 59 |
color_palette = px.colors.sequential.Viridis
|
| 60 |
|
| 61 |
-
# Visualization function
|
| 62 |
-
def create_visualizations(df):
|
| 63 |
-
# [Existing visualization code]
|
| 64 |
-
pass
|
| 65 |
-
|
| 66 |
# Page navigation
|
| 67 |
page = st.sidebar.selectbox("Choose a page", ["Overview", "Sentiment Analysis", "Discrimination Analysis", "Channel Analysis"])
|
| 68 |
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
elif page == "Discrimination Analysis":
|
| 74 |
-
create_visualizations(df) # Placeholder for discrimination analysis visualizations
|
| 75 |
-
elif page == "Channel Analysis":
|
| 76 |
-
create_visualizations(df) # Placeholder for channel analysis visualizations
|
| 77 |
-
|
| 78 |
-
# [Place the rest of the code for the visualizations here]
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
# Define a color palette for consistent visualization styles
|
| 82 |
-
color_palette = px.colors.sequential.Viridis
|
| 83 |
-
|
| 84 |
-
# Function for Domain Distribution Chart
|
| 85 |
-
def create_domain_distribution_chart(df):
|
| 86 |
-
fig = px.pie(df, names='Domain', title='Distribution of Domains', hole=0.35)
|
| 87 |
-
fig.update_layout(title_x=0.5, margin=dict(l=20, r=20, t=30, b=20), legend=dict(x=0.1, y=1))
|
| 88 |
fig.update_traces(marker=dict(colors=color_palette))
|
| 89 |
return fig
|
| 90 |
|
| 91 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
def create_sentiment_distribution_chart(df):
|
| 93 |
-
|
| 94 |
-
|
|
|
|
|
|
|
| 95 |
return fig
|
| 96 |
|
| 97 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
# Function for Channel-wise Sentiment Over Time Chart
|
| 100 |
def create_channel_sentiment_over_time_chart(df):
|
|
@@ -111,20 +97,40 @@ def create_channel_discrimination_chart(df):
|
|
| 111 |
fig.update_layout(title='Channel-wise Distribution of Discriminative Content', margin=dict(l=20, r=20, t=40, b=20))
|
| 112 |
return fig
|
| 113 |
|
| 114 |
-
|
| 115 |
-
def render_dashboard():
|
| 116 |
-
# Overview page layout
|
| 117 |
if page == "Overview":
|
| 118 |
-
st.
|
|
|
|
| 119 |
col1, col2 = st.beta_columns(2)
|
| 120 |
with col1:
|
| 121 |
-
st.plotly_chart(
|
| 122 |
with col2:
|
| 123 |
-
st.plotly_chart(
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
|
| 129 |
# Render the dashboard with filtered data
|
| 130 |
-
render_dashboard(df_filtered)
|
|
|
|
| 10 |
# Set page configuration
|
| 11 |
st.set_page_config(layout="wide")
|
| 12 |
|
| 13 |
+
# Function to load and clean data
|
| 14 |
def load_and_clean_data():
|
|
|
|
| 15 |
df1 = pd.read_csv("data/reviewed_social_media_english.csv")
|
| 16 |
df2 = pd.read_csv("data/reviewed_news_english.csv")
|
| 17 |
df3 = pd.read_csv("data/tamil_social_media.csv")
|
| 18 |
df4 = pd.read_csv("data/tamil_news.csv")
|
| 19 |
|
| 20 |
+
# Concatenate dataframes and clean data
|
| 21 |
df_combined = pd.concat([df1, df2, df3, df4])
|
|
|
|
|
|
|
| 22 |
df_combined['Domain'] = df_combined['Domain'].replace("MUSLIM", "Muslim")
|
|
|
|
|
|
|
| 23 |
df_combined = df_combined[df_combined['Domain'] != 'Not relevant']
|
| 24 |
df_combined = df_combined[df_combined['Domain'] != 'None']
|
| 25 |
df_combined = df_combined[df_combined['Discrimination'] != 'None']
|
|
|
|
| 27 |
|
| 28 |
return df_combined
|
| 29 |
|
|
|
|
| 30 |
df = load_and_clean_data()
|
| 31 |
|
|
|
|
|
|
|
|
|
|
| 32 |
# Define Sidebar Filters
|
| 33 |
domain_options = df['Domain'].unique()
|
| 34 |
channel_options = df['Channel'].unique()
|
|
|
|
| 40 |
sentiment_filter = st.sidebar.multiselect('Select Sentiment', options=sentiment_options, default=sentiment_options)
|
| 41 |
discrimination_filter = st.sidebar.multiselect('Select Discrimination', options=discrimination_options, default=discrimination_options)
|
| 42 |
|
| 43 |
+
# Apply filters
|
| 44 |
df_filtered = df[(df['Domain'].isin(domain_filter)) &
|
| 45 |
(df['Channel'].isin(channel_filter)) &
|
| 46 |
(df['Sentiment'].isin(sentiment_filter)) &
|
| 47 |
(df['Discrimination'].isin(discrimination_filter))]
|
| 48 |
|
|
|
|
| 49 |
# Define a color palette for consistent visualization styles
|
| 50 |
color_palette = px.colors.sequential.Viridis
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
# Page navigation
|
| 53 |
page = st.sidebar.selectbox("Choose a page", ["Overview", "Sentiment Analysis", "Discrimination Analysis", "Channel Analysis"])
|
| 54 |
|
| 55 |
+
# Visualisation for Domain Distribution
|
| 56 |
+
def create_pie_chart(df, column, title):
|
| 57 |
+
fig = px.pie(df, names=column, title=title, hole=0.35)
|
| 58 |
+
fig.update_layout(margin=dict(l=20, r=20, t=30, b=20), legend=dict(x=0.1, y=1), font=dict(size=12))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
fig.update_traces(marker=dict(colors=color_palette))
|
| 60 |
return fig
|
| 61 |
|
| 62 |
+
# Visualization for Distribution of Gender versus Ethnicity
|
| 63 |
+
def create_gender_ethnicity_distribution_chart(df):
|
| 64 |
+
df['GenderOrEthnicity'] = df['Domain'].apply(lambda x: "Gender: Women & LGBTQIA+" if x in ["Women", "LGBTQIA+"] else "Ethnicity")
|
| 65 |
+
fig = px.pie(df, names='GenderOrEthnicity', title='Distribution of Gender versus Ethnicity', hole=0.35)
|
| 66 |
+
fig.update_layout(margin=dict(l=20, r=20, t=30, b=20), legend=dict(x=0.1, y=1), font=dict(size=12))
|
| 67 |
+
return fig
|
| 68 |
+
|
| 69 |
+
# Visualization for Sentiment Distribution Across Domains
|
| 70 |
def create_sentiment_distribution_chart(df):
|
| 71 |
+
df['Discrimination'] = df['Discrimination'].replace({"Non Discriminative": "Non-Discriminative"}) # Assuming typo in the original script
|
| 72 |
+
domain_counts = df.groupby(['Domain', 'Sentiment']).size().reset_index(name='counts')
|
| 73 |
+
fig = px.bar(domain_counts, x='Domain', y='counts', color='Sentiment', title="Sentiment Distribution Across Domains", barmode='stack')
|
| 74 |
+
fig.update_layout(margin=dict(l=20, r=20, t=40, b=20), xaxis_title="Domain", yaxis_title="Counts", font=dict(size=12))
|
| 75 |
return fig
|
| 76 |
|
| 77 |
+
# Visualization for Correlation between Sentiment and Discrimination
|
| 78 |
+
def create_sentiment_discrimination_grouped_chart(df):
|
| 79 |
+
crosstab_df = pd.crosstab(df['Sentiment'], df['Discrimination']).reset_index()
|
| 80 |
+
melted_df = pd.melt(crosstab_df, id_vars='Sentiment', value_vars=['Yes', 'No'], var_name='Discrimination', value_name='Count')
|
| 81 |
+
fig = px.bar(melted_df, x='Sentiment', y='Count', color='Discrimination', barmode='group', title="Sentiment vs. Discrimination")
|
| 82 |
+
fig.update_layout(margin=dict(l=20, r=20, t=40, b=20), xaxis_title="Sentiment", yaxis_title="Count", font=dict(size=12))
|
| 83 |
+
return fig
|
| 84 |
|
| 85 |
# Function for Channel-wise Sentiment Over Time Chart
|
| 86 |
def create_channel_sentiment_over_time_chart(df):
|
|
|
|
| 97 |
fig.update_layout(title='Channel-wise Distribution of Discriminative Content', margin=dict(l=20, r=20, t=40, b=20))
|
| 98 |
return fig
|
| 99 |
|
| 100 |
+
def render_dashboard(page, df_filtered):
|
|
|
|
|
|
|
| 101 |
if page == "Overview":
|
| 102 |
+
st.title("Overview Dashboard")
|
| 103 |
+
# Create 2x2 grid for overview visualizations
|
| 104 |
col1, col2 = st.beta_columns(2)
|
| 105 |
with col1:
|
| 106 |
+
st.plotly_chart(create_pie_chart(df_filtered, 'Domain', 'Distribution of Domains'))
|
| 107 |
with col2:
|
| 108 |
+
st.plotly_chart(create_gender_ethnicity_distribution_chart(df_filtered))
|
| 109 |
+
|
| 110 |
+
col3, col4 = st.beta_columns(2)
|
| 111 |
+
with col3:
|
| 112 |
+
st.plotly_chart(create_sentiment_distribution_chart(df_filtered))
|
| 113 |
+
with col4:
|
| 114 |
+
st.plotly_chart(create_sentiment_discrimination_grouped_chart(df_filtered))
|
| 115 |
+
|
| 116 |
+
elif page == "Sentiment Analysis":
|
| 117 |
+
st.title("Sentiment Analysis Dashboard")
|
| 118 |
+
# Implementation for the "Sentiment Analysis" page...
|
| 119 |
+
# Example: st.plotly_chart(create_some_other_chart(df_filtered))
|
| 120 |
+
|
| 121 |
+
elif page == "Discrimination Analysis":
|
| 122 |
+
st.title("Discrimination Analysis Dashboard")
|
| 123 |
+
# Implementation for the "Discrimination Analysis" page...
|
| 124 |
+
# Example: st.plotly_chart(create_another_chart(df_filtered))
|
| 125 |
+
|
| 126 |
+
elif page == "Channel Analysis":
|
| 127 |
+
st.title("Channel Analysis Dashboard")
|
| 128 |
+
# Create visualizations for the channel analysis page
|
| 129 |
+
col1, col2 = st.columns(2)
|
| 130 |
+
with col1:
|
| 131 |
+
st.plotly_chart(create_channel_sentiment_over_time_chart(df_filtered))
|
| 132 |
+
with col2:
|
| 133 |
+
st.plotly_chart(create_channel_discrimination_chart(df_filtered))
|
| 134 |
|
| 135 |
# Render the dashboard with filtered data
|
| 136 |
+
render_dashboard(page, df_filtered)
|