Update app.py
Browse files
app.py
CHANGED
|
@@ -36,7 +36,7 @@ df = load_and_clean_data()
|
|
| 36 |
|
| 37 |
|
| 38 |
# Page navigation setup
|
| 39 |
-
page_names = ["Overview", "Sentiment Analysis", "Discrimination Analysis", "Channel Analysis"]
|
| 40 |
page = st.sidebar.selectbox("Choose a page", page_names)
|
| 41 |
|
| 42 |
# Sidebar Filters
|
|
@@ -63,7 +63,7 @@ color_palette = px.colors.sequential.Viridis
|
|
| 63 |
# Visualisation for Domain Distribution
|
| 64 |
def create_pie_chart(df, column, title):
|
| 65 |
fig = px.pie(df, names=column, title=title, hole=0.35)
|
| 66 |
-
fig.update_layout(margin=dict(l=
|
| 67 |
fig.update_traces(marker=dict(colors=color_palette))
|
| 68 |
return fig
|
| 69 |
|
|
@@ -71,7 +71,7 @@ def create_pie_chart(df, column, title):
|
|
| 71 |
def create_gender_ethnicity_distribution_chart(df):
|
| 72 |
df['GenderOrEthnicity'] = df['Domain'].apply(lambda x: "Gender: Women & LGBTQIA+" if x in ["Women", "LGBTQIA+"] else "Ethnicity")
|
| 73 |
fig = px.pie(df, names='GenderOrEthnicity', title='Distribution of Gender versus Ethnicity', hole=0.35)
|
| 74 |
-
fig.update_layout(margin=dict(l=
|
| 75 |
return fig
|
| 76 |
|
| 77 |
# Visualization for Sentiment Distribution Across Domains
|
|
@@ -79,7 +79,7 @@ def create_sentiment_distribution_chart(df):
|
|
| 79 |
df['Discrimination'] = df['Discrimination'].replace({"Non Discriminative": "Non-Discriminative"}) # Assuming typo in the original script
|
| 80 |
domain_counts = df.groupby(['Domain', 'Sentiment']).size().reset_index(name='counts')
|
| 81 |
fig = px.bar(domain_counts, x='Domain', y='counts', color='Sentiment', title="Sentiment Distribution Across Domains", barmode='stack')
|
| 82 |
-
fig.update_layout(margin=dict(l=
|
| 83 |
return fig
|
| 84 |
|
| 85 |
# Visualization for Correlation between Sentiment and Discrimination
|
|
@@ -96,7 +96,7 @@ def create_sentiment_discrimination_grouped_chart(df):
|
|
| 96 |
# Proceeding to plot only if we have data to plot
|
| 97 |
if not melted_df.empty:
|
| 98 |
fig = px.bar(melted_df, x='Sentiment', y='Count', color='Discrimination', barmode='group', title="Sentiment vs. Discrimination")
|
| 99 |
-
fig.update_layout(margin=dict(l=
|
| 100 |
return fig
|
| 101 |
else:
|
| 102 |
return "No data to display for the selected filters."
|
|
@@ -111,7 +111,7 @@ def create_top_negative_sentiment_domains_chart(df):
|
|
| 111 |
colors = ['limegreen', 'crimson', 'darkcyan']
|
| 112 |
fig = px.bar(domain_counts_subset, x='Count', y='Domain', title='Top Domains with Negative Sentiment', color='Domain',
|
| 113 |
orientation='h', color_discrete_sequence=colors)
|
| 114 |
-
fig.update_layout(margin=dict(l=
|
| 115 |
return fig
|
| 116 |
|
| 117 |
# Function for Key Phrases in Negative Sentiment Content Chart
|
|
@@ -122,15 +122,26 @@ def create_key_phrases_negative_sentiment_chart(df):
|
|
| 122 |
ngram_freq = pd.DataFrame(sorted([(count_values[i], k) for k, i in cv.vocabulary_.items()], reverse=True))
|
| 123 |
ngram_freq.columns = ['frequency', 'ngram']
|
| 124 |
fig = px.bar(ngram_freq.head(10), x='frequency', y='ngram', orientation='h', title='Key phrases in Negative Sentiment Content')
|
| 125 |
-
fig.update_layout(margin=dict(l=
|
| 126 |
return fig
|
| 127 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
# Function for Prevalence of Discriminatory Content Chart
|
| 129 |
def create_prevalence_discriminatory_content_chart(df):
|
| 130 |
domain_counts = df.groupby(['Domain', 'Discrimination']).size().unstack(fill_value=0)
|
| 131 |
fig = px.bar(domain_counts, x=domain_counts.index, y=['Discriminative', 'Non-Discriminative'], barmode='group',
|
| 132 |
title='Prevalence of Discriminatory Content')
|
| 133 |
-
fig.update_layout(margin=dict(l=
|
| 134 |
return fig
|
| 135 |
|
| 136 |
# Function for Top Domains with Discriminatory Content Chart
|
|
@@ -141,27 +152,27 @@ def create_top_discriminatory_domains_chart(df):
|
|
| 141 |
domain_counts_subset = domain_counts_subset.rename(columns={'Discriminative': 'Count'})
|
| 142 |
fig = px.bar(domain_counts_subset, x='Count', y=domain_counts_subset.index, orientation='h',
|
| 143 |
title='Top Domains with Discriminatory Content')
|
| 144 |
-
fig.update_layout(margin=dict(l=
|
| 145 |
return fig
|
| 146 |
|
| 147 |
# Function for Channel-wise Sentiment Over Time Chart
|
| 148 |
def create_sentiment_distribution_by_channel_chart(df):
|
| 149 |
sentiment_by_channel = df.groupby(['Channel', 'Sentiment']).size().reset_index(name='counts')
|
| 150 |
fig = px.bar(sentiment_by_channel, x='Channel', y='counts', color='Sentiment', title="Sentiment Distribution by Channel", barmode='group')
|
| 151 |
-
fig.update_layout(margin=dict(l=
|
| 152 |
return fig
|
| 153 |
|
| 154 |
# Function for Channel-wise Distribution of Discriminative Content Chart
|
| 155 |
def create_channel_discrimination_chart(df):
|
| 156 |
channel_discrimination = df.groupby(['Channel', 'Discrimination']).size().unstack(fill_value=0)
|
| 157 |
fig = px.bar(channel_discrimination, x=channel_discrimination.index, y=['Discriminative', 'Non-Discriminative'], barmode='group')
|
| 158 |
-
fig.update_layout(title='Channel-wise Distribution of Discriminative Content', margin=dict(l=
|
| 159 |
return fig
|
| 160 |
|
| 161 |
# Function for rendering dashboard
|
| 162 |
def render_dashboard(page, df_filtered):
|
| 163 |
-
if page == "Overview":
|
| 164 |
-
st.title("Overview Dashboard")
|
| 165 |
col1, col2 = st.columns(2)
|
| 166 |
with col1:
|
| 167 |
st.plotly_chart(create_pie_chart(df_filtered, 'Domain', 'Distribution of Domains'))
|
|
@@ -189,6 +200,8 @@ def render_dashboard(page, df_filtered):
|
|
| 189 |
col3, col4 = st.columns(2)
|
| 190 |
with col3:
|
| 191 |
st.plotly_chart(create_key_phrases_negative_sentiment_chart(df_filtered))
|
|
|
|
|
|
|
| 192 |
|
| 193 |
elif page == "Discrimination Analysis":
|
| 194 |
st.title("Discrimination Analysis Dashboard")
|
|
|
|
| 36 |
|
| 37 |
|
| 38 |
# Page navigation setup
|
| 39 |
+
page_names = [" GESI Overview", "Sentiment Analysis", "Discrimination Analysis", "Channel Analysis"]
|
| 40 |
page = st.sidebar.selectbox("Choose a page", page_names)
|
| 41 |
|
| 42 |
# Sidebar Filters
|
|
|
|
| 63 |
# Visualisation for Domain Distribution
|
| 64 |
def create_pie_chart(df, column, title):
|
| 65 |
fig = px.pie(df, names=column, title=title, hole=0.35)
|
| 66 |
+
fig.update_layout(margin=dict(l=6, r=6, t=12, b=6), legend=dict(x=0.1, y=1), font=dict(size=7)
|
| 67 |
fig.update_traces(marker=dict(colors=color_palette))
|
| 68 |
return fig
|
| 69 |
|
|
|
|
| 71 |
def create_gender_ethnicity_distribution_chart(df):
|
| 72 |
df['GenderOrEthnicity'] = df['Domain'].apply(lambda x: "Gender: Women & LGBTQIA+" if x in ["Women", "LGBTQIA+"] else "Ethnicity")
|
| 73 |
fig = px.pie(df, names='GenderOrEthnicity', title='Distribution of Gender versus Ethnicity', hole=0.35)
|
| 74 |
+
fig.update_layout(margin=dict(l=6, r=6, t=12, b=6), legend=dict(x=0.1, y=1), font=dict(size=7)
|
| 75 |
return fig
|
| 76 |
|
| 77 |
# Visualization for Sentiment Distribution Across Domains
|
|
|
|
| 79 |
df['Discrimination'] = df['Discrimination'].replace({"Non Discriminative": "Non-Discriminative"}) # Assuming typo in the original script
|
| 80 |
domain_counts = df.groupby(['Domain', 'Sentiment']).size().reset_index(name='counts')
|
| 81 |
fig = px.bar(domain_counts, x='Domain', y='counts', color='Sentiment', title="Sentiment Distribution Across Domains", barmode='stack')
|
| 82 |
+
fig.update_layout(margin=dict(l=6, r=6, t=12, b=6), xaxis_title="Domain", yaxis_title="Counts", font=dict(size=7))
|
| 83 |
return fig
|
| 84 |
|
| 85 |
# Visualization for Correlation between Sentiment and Discrimination
|
|
|
|
| 96 |
# Proceeding to plot only if we have data to plot
|
| 97 |
if not melted_df.empty:
|
| 98 |
fig = px.bar(melted_df, x='Sentiment', y='Count', color='Discrimination', barmode='group', title="Sentiment vs. Discrimination")
|
| 99 |
+
fig.update_layout(margin=dict(l=6, r=6, t=12, b=6), xaxis_title="Sentiment", yaxis_title="Count", font=dict(size=7))
|
| 100 |
return fig
|
| 101 |
else:
|
| 102 |
return "No data to display for the selected filters."
|
|
|
|
| 111 |
colors = ['limegreen', 'crimson', 'darkcyan']
|
| 112 |
fig = px.bar(domain_counts_subset, x='Count', y='Domain', title='Top Domains with Negative Sentiment', color='Domain',
|
| 113 |
orientation='h', color_discrete_sequence=colors)
|
| 114 |
+
fig.update_layout(margin=dict(l=6, r=6, t=12, b=6), xaxis_title="Negative sentiment content Count", yaxis_title="Domain", font=dict(size=7))
|
| 115 |
return fig
|
| 116 |
|
| 117 |
# Function for Key Phrases in Negative Sentiment Content Chart
|
|
|
|
| 122 |
ngram_freq = pd.DataFrame(sorted([(count_values[i], k) for k, i in cv.vocabulary_.items()], reverse=True))
|
| 123 |
ngram_freq.columns = ['frequency', 'ngram']
|
| 124 |
fig = px.bar(ngram_freq.head(10), x='frequency', y='ngram', orientation='h', title='Key phrases in Negative Sentiment Content')
|
| 125 |
+
fig.update_layout(margin=dict(l=6, r=6, t=12, b=6), xaxis_title="Frequency", yaxis_title="Trigram", font=dict(size=7))
|
| 126 |
return fig
|
| 127 |
|
| 128 |
+
# Function for Key Phrases in Positive Sentiment Content Chart
|
| 129 |
+
def create_key_phrases_positive_sentiment_chart(df):
|
| 130 |
+
cv = CountVectorizer(ngram_range=(3, 3), stop_words='english')
|
| 131 |
+
trigrams = cv.fit_transform(df['Content'][df['Sentiment'] == 'Positive'])
|
| 132 |
+
count_values = trigrams.toarray().sum(axis=0)
|
| 133 |
+
ngram_freq = pd.DataFrame(sorted([(count_values[i], k) for k, i in cv.vocabulary_.items()], reverse=True))
|
| 134 |
+
ngram_freq.columns = ['frequency', 'ngram']
|
| 135 |
+
fig = px.bar(ngram_freq.head(10), x='frequency', y='ngram', orientation='h', title='Key phrases in Positive Sentiment Content')
|
| 136 |
+
fig.update_layout(margin=dict(l=6, r=6, t=12, b=6), xaxis_title="Frequency", yaxis_title="Trigram", font=dict(size=7))
|
| 137 |
+
return fig
|
| 138 |
+
|
| 139 |
# Function for Prevalence of Discriminatory Content Chart
|
| 140 |
def create_prevalence_discriminatory_content_chart(df):
|
| 141 |
domain_counts = df.groupby(['Domain', 'Discrimination']).size().unstack(fill_value=0)
|
| 142 |
fig = px.bar(domain_counts, x=domain_counts.index, y=['Discriminative', 'Non-Discriminative'], barmode='group',
|
| 143 |
title='Prevalence of Discriminatory Content')
|
| 144 |
+
fig.update_layout(margin=dict(l=6, r=6, t=12, b=6), xaxis_title="Domain", yaxis_title="Count", font=dict(size=7))
|
| 145 |
return fig
|
| 146 |
|
| 147 |
# Function for Top Domains with Discriminatory Content Chart
|
|
|
|
| 152 |
domain_counts_subset = domain_counts_subset.rename(columns={'Discriminative': 'Count'})
|
| 153 |
fig = px.bar(domain_counts_subset, x='Count', y=domain_counts_subset.index, orientation='h',
|
| 154 |
title='Top Domains with Discriminatory Content')
|
| 155 |
+
fig.update_layout(margin=dict((l=6, r=6, t=12, b=6), xaxis_title="Discriminatory Content Count", yaxis_title="Domain", font=dict(size=7))
|
| 156 |
return fig
|
| 157 |
|
| 158 |
# Function for Channel-wise Sentiment Over Time Chart
|
| 159 |
def create_sentiment_distribution_by_channel_chart(df):
|
| 160 |
sentiment_by_channel = df.groupby(['Channel', 'Sentiment']).size().reset_index(name='counts')
|
| 161 |
fig = px.bar(sentiment_by_channel, x='Channel', y='counts', color='Sentiment', title="Sentiment Distribution by Channel", barmode='group')
|
| 162 |
+
fig.update_layout(margin=dict(l=6, r=6, t=12, b=6), xaxis_title="Channel", yaxis_title="Counts", font=dict(size=7))
|
| 163 |
return fig
|
| 164 |
|
| 165 |
# Function for Channel-wise Distribution of Discriminative Content Chart
|
| 166 |
def create_channel_discrimination_chart(df):
|
| 167 |
channel_discrimination = df.groupby(['Channel', 'Discrimination']).size().unstack(fill_value=0)
|
| 168 |
fig = px.bar(channel_discrimination, x=channel_discrimination.index, y=['Discriminative', 'Non-Discriminative'], barmode='group')
|
| 169 |
+
fig.update_layout(title='Channel-wise Distribution of Discriminative Content', margin=dict(l=6, r=6, t=12, b=6), font=dict(size=7))
|
| 170 |
return fig
|
| 171 |
|
| 172 |
# Function for rendering dashboard
|
| 173 |
def render_dashboard(page, df_filtered):
|
| 174 |
+
if page == " GESI Overview":
|
| 175 |
+
st.title(" GESI Overview Dashboard")
|
| 176 |
col1, col2 = st.columns(2)
|
| 177 |
with col1:
|
| 178 |
st.plotly_chart(create_pie_chart(df_filtered, 'Domain', 'Distribution of Domains'))
|
|
|
|
| 200 |
col3, col4 = st.columns(2)
|
| 201 |
with col3:
|
| 202 |
st.plotly_chart(create_key_phrases_negative_sentiment_chart(df_filtered))
|
| 203 |
+
with col4:
|
| 204 |
+
st.plotly_chart(create_key_phrases_positive_sentiment_chart(df_filtered)
|
| 205 |
|
| 206 |
elif page == "Discrimination Analysis":
|
| 207 |
st.title("Discrimination Analysis Dashboard")
|