Spaces:
Sleeping
Sleeping
import streamlit as st | |
from datasets import load_dataset | |
import json | |
from wordcloud import WordCloud | |
import matplotlib.pyplot as plt | |
import networkx as nx | |
from pyvis.network import Network | |
import streamlit.components.v1 as components | |
# main layout | |
HEIGHT = 800 | |
st.set_page_config(layout="wide") | |
st.title("Reddit mental map 🧠") | |
col1, col2, col3 = st.columns([1, 1, 2]) | |
with col2: | |
upper_panel = st.container() | |
middle_panel = st.container() | |
lower_panel = st.container() | |
st.sidebar.title("Reddit mental map 🧠") | |
st.sidebar.write("This app is a mental map of Reddit posts related to:") | |
st.sidebar.markdown( | |
""" | |
- Attention-deficit/hyperactivity disorder (ADHD) | |
- Aspergers | |
- Depression | |
- Obsessive-compulsive disorder (OCD) | |
- Post-traumatic stress disorder (PTSD) | |
""" | |
) | |
st.sidebar.write( | |
"The map aims to display a glimpse of :red-background[personal point of views of people who navigate through their mental wellbeing journey]." | |
) | |
st.sidebar.header("Update mental map ✨") | |
condition = st.sidebar.selectbox( | |
"Select a condition", ["ADHD", "Aspergers", "Depression", "OCD", "PTSD"] | |
) | |
st.sidebar.header("References:") | |
st.sidebar.markdown( | |
"Hugging Face datasets: [reddit_mental_health_posts] (https://huggingface.co/datasets/solomonk/reddit_mental_health_posts)" | |
) | |
st.sidebar.markdown( | |
"Semantic role labeling code adapted from [FS Ndzomga's Medium] (https://medium.com/thoughts-on-machine-learning/building-knowledge-graphs-with-spacy-networkx-and-matplotlib-a-glimpse-into-semantic-role-e49c9dbe26b4)" | |
) | |
# data loader | |
dataset = load_dataset("solomonk/reddit_mental_health_posts") | |
df = dataset["train"].to_pandas() | |
if condition == "ADHD": | |
df = df[df["subreddit"] == "ADHD"] | |
json_file = "data/adhd_clean.json" | |
elif condition == "Aspergers": | |
df = df[df["subreddit"] == "aspergers"] | |
json_file = "data/aspergers_clean.json" | |
elif condition == "Depression": | |
df = df[df["subreddit"] == "depression"] | |
json_file = "data/depression_clean.json" | |
elif condition == "OCD": | |
df = df[df["subreddit"] == "OCD"] | |
json_file = "data/ocd_clean.json" | |
elif condition == "PTSD": | |
df = df[df["subreddit"] == "ptsd"] | |
json_file = "data/ptsd_clean.json" | |
with open(json_file, "r") as f: # Change by diagnosis | |
srl_results = json.load(f) | |
subjects = " ".join( | |
value for d in srl_results if "subjects" in d for value in d["subjects"] | |
) | |
verbs = " ".join(value for d in srl_results if "verbs" in d for value in d["verbs"]) | |
objects = " ".join( | |
value for d in srl_results if "objects" in d for value in d["objects"] | |
) | |
# dataframe | |
with col1: | |
body = df["body"][~df["body"].isin(["[removed]", "[deleted]"])] | |
event = st.dataframe( | |
body, | |
use_container_width=True, | |
height=HEIGHT, | |
hide_index=True, | |
on_select="rerun", | |
selection_mode="single-row", | |
) | |
# word cloud | |
stopwords = [ | |
"day", | |
"hour", | |
"hours", | |
"know", | |
"month", | |
"talk", | |
"thing", | |
"things", | |
"think", | |
"time", | |
"try", | |
"want", | |
"year", | |
] | |
def generate_better_wordcloud(data, mask=None): | |
cloud = WordCloud( | |
scale=3, | |
max_words=150, | |
colormap="RdGy", | |
mask=mask, | |
background_color="white", | |
stopwords=stopwords, | |
collocations=True, | |
).generate_from_text(data) | |
fig = plt.figure() | |
plt.imshow(cloud) | |
plt.axis("off") | |
return fig | |
with upper_panel: | |
st.subheader("Subjects") | |
figs = generate_better_wordcloud(subjects) | |
st.pyplot(figs) | |
with middle_panel: | |
st.subheader("Verbs") | |
figv = generate_better_wordcloud(verbs) | |
st.pyplot(figv) | |
with lower_panel: | |
st.subheader("Objects") | |
figo = generate_better_wordcloud(objects) | |
st.pyplot(figo) | |
# network | |
def build_and_plot_knowledge_graph_pyvis(result): | |
G = nx.DiGraph() | |
subjects = result["subjects"] | |
verbs = result["verbs"] | |
objects = result["objects"] | |
indirect_objects = result["indirect_objects"] | |
for subject in subjects: | |
for verb in verbs: | |
for obj in objects: | |
G.add_edge(subject, obj, label=verb) | |
for ind_obj in indirect_objects: | |
G.add_edge(subject, ind_obj, label=verb) | |
pos = nx.spring_layout(G, seed=42, k=0.5, iterations=50) | |
nx.draw( | |
G, | |
pos, | |
with_labels=True, | |
node_color="#FF746C", | |
node_size=2000, | |
font_size=12, | |
font_color="black", | |
font_weight="normal", | |
arrows=True, | |
) | |
edge_labels = nx.get_edge_attributes(G, "label") | |
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels) | |
net = Network() | |
net.repulsion() | |
net.from_nx(G) | |
fig = plt.gcf() | |
return fig | |
with col3: | |
try: | |
st.subheader("Mental map") | |
st.write( | |
"This is a 2D knowledge graph from simple :red-background[semantic role labeling] of the reddit post using spaCy, NetworkX, and Matplotlib. :red-background[Tick the box next to the row of interest to display the mental map of the individual post]. The graph shows the relationship between the subject, verb, and object at singular level, to complement the full-level overview of the word clouds. It takes a moment to load the data and if the image does not show, it is because some of the posts are deleted or removed in the original dataset." | |
) | |
person = int(event.selection.rows[0]) | |
plt.clf() | |
fign = build_and_plot_knowledge_graph_pyvis(srl_results[person]) | |
st.pyplot(fign) | |
except: | |
pass | |