Spaces:

ivcvy
/

reddit-mental-map

Sleeping

File size: 5,652 Bytes

import streamlit as st
from datasets import load_dataset
import json
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import networkx as nx
from pyvis.network import Network
import streamlit.components.v1 as components

# main layout
HEIGHT = 800
st.set_page_config(layout="wide")
st.title("Reddit mental map 🧠")
col1, col2, col3 = st.columns([1, 1, 2])
with col2:
    upper_panel = st.container()
    middle_panel = st.container()
    lower_panel = st.container()

st.sidebar.title("Reddit mental map 🧠")

st.sidebar.write("This app is a mental map of Reddit posts related to:")
st.sidebar.markdown(
    """
                    - Attention-deficit/hyperactivity disorder (ADHD)
                    - Aspergers
                    - Depression
                    - Obsessive-compulsive disorder (OCD)
                    - Post-traumatic stress disorder (PTSD)
    """
)

st.sidebar.write(
    "The map aims to display a glimpse of :red-background[personal point of views of people who navigate through their mental wellbeing journey]."
)

st.sidebar.header("Update mental map ✨")
condition = st.sidebar.selectbox(
    "Select a condition", ["ADHD", "Aspergers", "Depression", "OCD", "PTSD"]
)

st.sidebar.header("References:")
st.sidebar.markdown(
    "Hugging Face datasets: [reddit_mental_health_posts] (https://huggingface.co/datasets/solomonk/reddit_mental_health_posts)"
)
st.sidebar.markdown(
    "Semantic role labeling code adapted from [FS Ndzomga's Medium] (https://medium.com/thoughts-on-machine-learning/building-knowledge-graphs-with-spacy-networkx-and-matplotlib-a-glimpse-into-semantic-role-e49c9dbe26b4)"
)

# data loader
dataset = load_dataset("solomonk/reddit_mental_health_posts")
df = dataset["train"].to_pandas()

if condition == "ADHD":
    df = df[df["subreddit"] == "ADHD"]
    json_file = "data/adhd_clean.json"
elif condition == "Aspergers":
    df = df[df["subreddit"] == "aspergers"]
    json_file = "data/aspergers_clean.json"
elif condition == "Depression":
    df = df[df["subreddit"] == "depression"]
    json_file = "data/depression_clean.json"
elif condition == "OCD":
    df = df[df["subreddit"] == "OCD"]
    json_file = "data/ocd_clean.json"
elif condition == "PTSD":
    df = df[df["subreddit"] == "ptsd"]
    json_file = "data/ptsd_clean.json"

with open(json_file, "r") as f:  # Change by diagnosis
    srl_results = json.load(f)

subjects = " ".join(
    value for d in srl_results if "subjects" in d for value in d["subjects"]
)
verbs = " ".join(value for d in srl_results if "verbs" in d for value in d["verbs"])
objects = " ".join(
    value for d in srl_results if "objects" in d for value in d["objects"]
)

# dataframe
with col1:
    body = df["body"][~df["body"].isin(["[removed]", "[deleted]"])]
    event = st.dataframe(
        body,
        use_container_width=True,
        height=HEIGHT,
        hide_index=True,
        on_select="rerun",
        selection_mode="single-row",
    )


# word cloud
stopwords = [
    "day",
    "hour",
    "hours",
    "know",
    "month",
    "talk",
    "thing",
    "things",
    "think",
    "time",
    "try",
    "want",
    "year",
]


def generate_better_wordcloud(data, mask=None):
    cloud = WordCloud(
        scale=3,
        max_words=150,
        colormap="RdGy",
        mask=mask,
        background_color="white",
        stopwords=stopwords,
        collocations=True,
    ).generate_from_text(data)
    fig = plt.figure()
    plt.imshow(cloud)
    plt.axis("off")
    return fig


with upper_panel:
    st.subheader("Subjects")
    figs = generate_better_wordcloud(subjects)
    st.pyplot(figs)

with middle_panel:
    st.subheader("Verbs")
    figv = generate_better_wordcloud(verbs)
    st.pyplot(figv)

with lower_panel:
    st.subheader("Objects")
    figo = generate_better_wordcloud(objects)
    st.pyplot(figo)


# network
def build_and_plot_knowledge_graph_pyvis(result):
    G = nx.DiGraph()

    subjects = result["subjects"]
    verbs = result["verbs"]
    objects = result["objects"]
    indirect_objects = result["indirect_objects"]

    for subject in subjects:
        for verb in verbs:
            for obj in objects:
                G.add_edge(subject, obj, label=verb)
            for ind_obj in indirect_objects:
                G.add_edge(subject, ind_obj, label=verb)

    pos = nx.spring_layout(G, seed=42, k=0.5, iterations=50)

    nx.draw(
        G,
        pos,
        with_labels=True,
        node_color="#FF746C",
        node_size=2000,
        font_size=12,
        font_color="black",
        font_weight="normal",
        arrows=True,
    )

    edge_labels = nx.get_edge_attributes(G, "label")
    nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)

    net = Network()
    net.repulsion()
    net.from_nx(G)
    fig = plt.gcf()

    return fig


with col3:
    try:
        st.subheader("Mental map")
        st.write(
            "This is a 2D knowledge graph from simple :red-background[semantic role labeling] of the reddit post using spaCy, NetworkX, and Matplotlib. :red-background[Tick the box next to the row of interest to display the mental map of the individual post]. The graph shows the relationship between the subject, verb, and object at singular level, to complement the full-level overview of the word clouds. It takes a moment to load the data and if the image does not show, it is because some of the posts are deleted or removed in the original dataset."
        )
        person = int(event.selection.rows[0])
        plt.clf()
        fign = build_and_plot_knowledge_graph_pyvis(srl_results[person])
        st.pyplot(fign)
    except:
        pass