Update app.py
Browse files
app.py
CHANGED
|
@@ -1,48 +1,113 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
-
import
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import matplotlib.pyplot as plt
|
| 4 |
+
from sklearn.feature_extraction.text import CountVectorizer
|
| 5 |
+
import seaborn as sns
|
| 6 |
+
import plotly.express as px
|
| 7 |
+
import plotly.io as pio
|
| 8 |
+
import plotly.graph_objects as go
|
| 9 |
+
|
| 10 |
+
# Set page configuration
|
| 11 |
+
st.set_page_config(layout="wide")
|
| 12 |
+
|
| 13 |
+
# Read data into dataframes
|
| 14 |
+
df1 = pd.read_csv("data/reviewed_social_media_english.csv")
|
| 15 |
+
df2 = pd.read_csv("data/reviewed_news_english.csv")
|
| 16 |
+
df3 = pd.read_csv("data/tamil_social_media")
|
| 17 |
+
df4 = pd.read_csv("data/tamil_news")
|
| 18 |
+
|
| 19 |
+
# Normalize Text
|
| 20 |
+
df1['Domain'].replace("MUSLIM", "Muslim", inplace=True)
|
| 21 |
+
df2['Domain'].replace("MUSLIM", "Muslim", inplace=True)
|
| 22 |
+
df3['Domain'].replace("MUSLIM", "Muslim", inplace=True)
|
| 23 |
+
df4['Domain'].replace("MUSLIM", "Muslim", inplace=True)
|
| 24 |
+
|
| 25 |
+
# Drop irrelevant data
|
| 26 |
+
frames = [df1, df2, df3, df4]
|
| 27 |
+
for df in frames:
|
| 28 |
+
df = df[df['Domain'] != 'Not relevant']
|
| 29 |
+
df = df[df['Domain'] != 'None']
|
| 30 |
+
df = df[df['Discrimination'] != 'None']
|
| 31 |
+
df = df[df['Sentiment'] != 'None']
|
| 32 |
+
|
| 33 |
+
# Concatenate/merge dataframes
|
| 34 |
+
df = pd.concat(frames)
|
| 35 |
+
|
| 36 |
+
# Visualization function
|
| 37 |
+
def create_visualizations(df):
|
| 38 |
+
# [Existing visualization code]
|
| 39 |
+
pass
|
| 40 |
+
|
| 41 |
+
# Page navigation
|
| 42 |
+
page = st.sidebar.selectbox("Choose a page", ["Overview", "Sentiment Analysis", "Discrimination Analysis", "Channel Analysis"])
|
| 43 |
+
|
| 44 |
+
if page == "Overview":
|
| 45 |
+
create_visualizations(df) # Placeholder for overview visualizations
|
| 46 |
+
elif page == "Sentiment Analysis":
|
| 47 |
+
create_visualizations(df) # Placeholder for sentiment analysis visualizations
|
| 48 |
+
elif page == "Discrimination Analysis":
|
| 49 |
+
create_visualizations(df) # Placeholder for discrimination analysis visualizations
|
| 50 |
+
elif page == "Channel Analysis":
|
| 51 |
+
create_visualizations(df) # Placeholder for channel analysis visualizations
|
| 52 |
+
|
| 53 |
+
# [Place the rest of the code for the visualizations here]
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
# Define a color palette for consistent visualization styles
|
| 57 |
+
color_palette = px.colors.sequential.Viridis
|
| 58 |
+
|
| 59 |
+
# Function for Domain Distribution Chart
|
| 60 |
+
def create_domain_distribution_chart(df):
|
| 61 |
+
fig = px.pie(df, names='Domain', title='Distribution of Domains', hole=0.35)
|
| 62 |
+
fig.update_layout(title_x=0.5, margin=dict(l=20, r=20, t=30, b=20), legend=dict(x=0.1, y=1))
|
| 63 |
+
fig.update_traces(marker=dict(colors=color_palette))
|
| 64 |
+
return fig
|
| 65 |
+
|
| 66 |
+
# Function for Sentiment Distribution Across Domains Chart
|
| 67 |
+
def create_sentiment_distribution_chart(df):
|
| 68 |
+
# ... [Include the existing code for the Sentiment Distribution chart]
|
| 69 |
+
fig.update_layout(margin=dict(l=20, r=20, t=40, b=20))
|
| 70 |
+
return fig
|
| 71 |
+
|
| 72 |
+
# ... [Define other chart functions following the same pattern]
|
| 73 |
+
|
| 74 |
+
# Function for Channel-wise Sentiment Over Time Chart
|
| 75 |
+
def create_channel_sentiment_over_time_chart(df):
|
| 76 |
+
df['Date'] = pd.to_datetime(df['Date'])
|
| 77 |
+
timeline = df.groupby([df['Date'].dt.to_period('M'), 'Channel', 'Sentiment']).size().unstack(fill_value=0)
|
| 78 |
+
fig = px.line(timeline, x=timeline.index.levels[1].to_timestamp(), y=['Positive', 'Negative', 'Neutral'], color='Channel')
|
| 79 |
+
fig.update_layout(title='Channel-wise Sentiment Over Time', margin=dict(l=20, r=20, t=40, b=20))
|
| 80 |
+
return fig
|
| 81 |
+
|
| 82 |
+
# Function for Channel-wise Distribution of Discriminative Content Chart
|
| 83 |
+
def create_channel_discrimination_chart(df):
|
| 84 |
+
channel_discrimination = df.groupby(['Channel', 'Discrimination']).size().unstack(fill_value=0)
|
| 85 |
+
fig = px.bar(channel_discrimination, x=channel_discrimination.index, y=['Discriminative', 'Non-Discriminative'], barmode='group')
|
| 86 |
+
fig.update_layout(title='Channel-wise Distribution of Discriminative Content', margin=dict(l=20, r=20, t=40, b=20))
|
| 87 |
+
return fig
|
| 88 |
+
|
| 89 |
+
# Dashboard Layout
|
| 90 |
+
def render_dashboard():
|
| 91 |
+
# Overview page layout
|
| 92 |
+
if page == "Overview":
|
| 93 |
+
st.header("Overview of Domains and Sentiments")
|
| 94 |
+
col1, col2 = st.beta_columns(2)
|
| 95 |
+
with col1:
|
| 96 |
+
st.plotly_chart(create_domain_distribution_chart(df))
|
| 97 |
+
with col2:
|
| 98 |
+
st.plotly_chart(create_sentiment_distribution_chart(df))
|
| 99 |
+
# ... [Additional overview charts]
|
| 100 |
+
|
| 101 |
+
# ... [Other pages]
|
| 102 |
+
|
| 103 |
+
# Sidebar Filters
|
| 104 |
+
domain_filter = st.sidebar.multiselect('Select Domain', options=df['Domain'].unique(), default=df['Domain'].unique())
|
| 105 |
+
channel_filter = st.sidebar.multiselect('Select Channel', options=df['Channel'].unique(), default=df['Channel'].unique())
|
| 106 |
+
sentiment_filter = st.sidebar.multiselect('Select Sentiment', options=df['Sentiment'].unique(), default=df['Sentiment'].unique())
|
| 107 |
+
discrimination_filter = st.sidebar.multiselect('Select Discrimination', options=df['Discrimination'].unique(), default=df['Discrimination'].unique())
|
| 108 |
+
|
| 109 |
+
# Apply the filters
|
| 110 |
+
df_filtered = df[df['Domain'].isin(domain_filter) & df['Channel'].isin(channel_filter) & df['Sentiment'].isin(sentiment_filter) & df['Discrimination'].isin(discrimination_filter)]
|
| 111 |
+
|
| 112 |
+
# Render the dashboard with filtered data
|
| 113 |
+
render_dashboard(df_filtered)
|