ivcvy's picture
Update app.py
ca0820a verified
import streamlit as st
from datasets import load_dataset
import json
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import networkx as nx
from pyvis.network import Network
import streamlit.components.v1 as components
# main layout
HEIGHT = 800
st.set_page_config(layout="wide")
st.title("Reddit mental map 🧠")
col1, col2, col3 = st.columns([1, 1, 2])
with col2:
upper_panel = st.container()
middle_panel = st.container()
lower_panel = st.container()
st.sidebar.title("Reddit mental map 🧠")
st.sidebar.write("This app is a mental map of Reddit posts related to:")
st.sidebar.markdown(
"""
- Attention-deficit/hyperactivity disorder (ADHD)
- Aspergers
- Depression
- Obsessive-compulsive disorder (OCD)
- Post-traumatic stress disorder (PTSD)
"""
)
st.sidebar.write(
"The map aims to display a glimpse of :red-background[personal point of views of people who navigate through their mental wellbeing journey]."
)
st.sidebar.header("Update mental map ✨")
condition = st.sidebar.selectbox(
"Select a condition", ["ADHD", "Aspergers", "Depression", "OCD", "PTSD"]
)
st.sidebar.header("References:")
st.sidebar.markdown(
"Hugging Face datasets: [reddit_mental_health_posts] (https://huggingface.co/datasets/solomonk/reddit_mental_health_posts)"
)
st.sidebar.markdown(
"Semantic role labeling code adapted from [FS Ndzomga's Medium] (https://medium.com/thoughts-on-machine-learning/building-knowledge-graphs-with-spacy-networkx-and-matplotlib-a-glimpse-into-semantic-role-e49c9dbe26b4)"
)
# data loader
dataset = load_dataset("solomonk/reddit_mental_health_posts")
df = dataset["train"].to_pandas()
if condition == "ADHD":
df = df[df["subreddit"] == "ADHD"]
json_file = "data/adhd_clean.json"
elif condition == "Aspergers":
df = df[df["subreddit"] == "aspergers"]
json_file = "data/aspergers_clean.json"
elif condition == "Depression":
df = df[df["subreddit"] == "depression"]
json_file = "data/depression_clean.json"
elif condition == "OCD":
df = df[df["subreddit"] == "OCD"]
json_file = "data/ocd_clean.json"
elif condition == "PTSD":
df = df[df["subreddit"] == "ptsd"]
json_file = "data/ptsd_clean.json"
with open(json_file, "r") as f: # Change by diagnosis
srl_results = json.load(f)
subjects = " ".join(
value for d in srl_results if "subjects" in d for value in d["subjects"]
)
verbs = " ".join(value for d in srl_results if "verbs" in d for value in d["verbs"])
objects = " ".join(
value for d in srl_results if "objects" in d for value in d["objects"]
)
# dataframe
with col1:
body = df["body"][~df["body"].isin(["[removed]", "[deleted]"])]
event = st.dataframe(
body,
use_container_width=True,
height=HEIGHT,
hide_index=True,
on_select="rerun",
selection_mode="single-row",
)
# word cloud
stopwords = [
"day",
"hour",
"hours",
"know",
"month",
"talk",
"thing",
"things",
"think",
"time",
"try",
"want",
"year",
]
def generate_better_wordcloud(data, mask=None):
cloud = WordCloud(
scale=3,
max_words=150,
colormap="RdGy",
mask=mask,
background_color="white",
stopwords=stopwords,
collocations=True,
).generate_from_text(data)
fig = plt.figure()
plt.imshow(cloud)
plt.axis("off")
return fig
with upper_panel:
st.subheader("Subjects")
figs = generate_better_wordcloud(subjects)
st.pyplot(figs)
with middle_panel:
st.subheader("Verbs")
figv = generate_better_wordcloud(verbs)
st.pyplot(figv)
with lower_panel:
st.subheader("Objects")
figo = generate_better_wordcloud(objects)
st.pyplot(figo)
# network
def build_and_plot_knowledge_graph_pyvis(result):
G = nx.DiGraph()
subjects = result["subjects"]
verbs = result["verbs"]
objects = result["objects"]
indirect_objects = result["indirect_objects"]
for subject in subjects:
for verb in verbs:
for obj in objects:
G.add_edge(subject, obj, label=verb)
for ind_obj in indirect_objects:
G.add_edge(subject, ind_obj, label=verb)
pos = nx.spring_layout(G, seed=42, k=0.5, iterations=50)
nx.draw(
G,
pos,
with_labels=True,
node_color="#FF746C",
node_size=2000,
font_size=12,
font_color="black",
font_weight="normal",
arrows=True,
)
edge_labels = nx.get_edge_attributes(G, "label")
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
net = Network()
net.repulsion()
net.from_nx(G)
fig = plt.gcf()
return fig
with col3:
try:
st.subheader("Mental map")
st.write(
"This is a 2D knowledge graph from simple :red-background[semantic role labeling] of the reddit post using spaCy, NetworkX, and Matplotlib. :red-background[Tick the box next to the row of interest to display the mental map of the individual post]. The graph shows the relationship between the subject, verb, and object at singular level, to complement the full-level overview of the word clouds. It takes a moment to load the data and if the image does not show, it is because some of the posts are deleted or removed in the original dataset."
)
person = int(event.selection.rows[0])
plt.clf()
fign = build_and_plot_knowledge_graph_pyvis(srl_results[person])
st.pyplot(fign)
except:
pass