import streamlit as st from datasets import load_dataset import json from wordcloud import WordCloud import matplotlib.pyplot as plt import networkx as nx from pyvis.network import Network import streamlit.components.v1 as components # main layout HEIGHT = 800 st.set_page_config(layout="wide") st.title("Reddit mental map 🧠") col1, col2, col3 = st.columns([1, 1, 2]) with col2: upper_panel = st.container() middle_panel = st.container() lower_panel = st.container() st.sidebar.title("Reddit mental map 🧠") st.sidebar.write("This app is a mental map of Reddit posts related to:") st.sidebar.markdown( """ - Attention-deficit/hyperactivity disorder (ADHD) - Aspergers - Depression - Obsessive-compulsive disorder (OCD) - Post-traumatic stress disorder (PTSD) """ ) st.sidebar.write( "The map aims to display a glimpse of :red-background[personal point of views of people who navigate through their mental wellbeing journey]." ) st.sidebar.header("Update mental map ✨") condition = st.sidebar.selectbox( "Select a condition", ["ADHD", "Aspergers", "Depression", "OCD", "PTSD"] ) st.sidebar.header("References:") st.sidebar.markdown( "Hugging Face datasets: [reddit_mental_health_posts] (https://huggingface.co/datasets/solomonk/reddit_mental_health_posts)" ) st.sidebar.markdown( "Semantic role labeling code adapted from [FS Ndzomga's Medium] (https://medium.com/thoughts-on-machine-learning/building-knowledge-graphs-with-spacy-networkx-and-matplotlib-a-glimpse-into-semantic-role-e49c9dbe26b4)" ) # data loader dataset = load_dataset("solomonk/reddit_mental_health_posts") df = dataset["train"].to_pandas() if condition == "ADHD": df = df[df["subreddit"] == "ADHD"] json_file = "data/adhd_clean.json" elif condition == "Aspergers": df = df[df["subreddit"] == "aspergers"] json_file = "data/aspergers_clean.json" elif condition == "Depression": df = df[df["subreddit"] == "depression"] json_file = "data/depression_clean.json" elif condition == "OCD": df = df[df["subreddit"] == "OCD"] json_file = "data/ocd_clean.json" elif condition == "PTSD": df = df[df["subreddit"] == "ptsd"] json_file = "data/ptsd_clean.json" with open(json_file, "r") as f: # Change by diagnosis srl_results = json.load(f) subjects = " ".join( value for d in srl_results if "subjects" in d for value in d["subjects"] ) verbs = " ".join(value for d in srl_results if "verbs" in d for value in d["verbs"]) objects = " ".join( value for d in srl_results if "objects" in d for value in d["objects"] ) # dataframe with col1: body = df["body"][~df["body"].isin(["[removed]", "[deleted]"])] event = st.dataframe( body, use_container_width=True, height=HEIGHT, hide_index=True, on_select="rerun", selection_mode="single-row", ) # word cloud stopwords = [ "day", "hour", "hours", "know", "month", "talk", "thing", "things", "think", "time", "try", "want", "year", ] def generate_better_wordcloud(data, mask=None): cloud = WordCloud( scale=3, max_words=150, colormap="RdGy", mask=mask, background_color="white", stopwords=stopwords, collocations=True, ).generate_from_text(data) fig = plt.figure() plt.imshow(cloud) plt.axis("off") return fig with upper_panel: st.subheader("Subjects") figs = generate_better_wordcloud(subjects) st.pyplot(figs) with middle_panel: st.subheader("Verbs") figv = generate_better_wordcloud(verbs) st.pyplot(figv) with lower_panel: st.subheader("Objects") figo = generate_better_wordcloud(objects) st.pyplot(figo) # network def build_and_plot_knowledge_graph_pyvis(result): G = nx.DiGraph() subjects = result["subjects"] verbs = result["verbs"] objects = result["objects"] indirect_objects = result["indirect_objects"] for subject in subjects: for verb in verbs: for obj in objects: G.add_edge(subject, obj, label=verb) for ind_obj in indirect_objects: G.add_edge(subject, ind_obj, label=verb) pos = nx.spring_layout(G, seed=42, k=0.5, iterations=50) nx.draw( G, pos, with_labels=True, node_color="#FF746C", node_size=2000, font_size=12, font_color="black", font_weight="normal", arrows=True, ) edge_labels = nx.get_edge_attributes(G, "label") nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels) net = Network() net.repulsion() net.from_nx(G) fig = plt.gcf() return fig with col3: try: st.subheader("Mental map") st.write( "This is a 2D knowledge graph from simple :red-background[semantic role labeling] of the reddit post using spaCy, NetworkX, and Matplotlib. :red-background[Tick the box next to the row of interest to display the mental map of the individual post]. The graph shows the relationship between the subject, verb, and object at singular level, to complement the full-level overview of the word clouds. It takes a moment to load the data and if the image does not show, it is because some of the posts are deleted or removed in the original dataset." ) person = int(event.selection.rows[0]) plt.clf() fign = build_and_plot_knowledge_graph_pyvis(srl_results[person]) st.pyplot(fign) except: pass