Spaces:

ivcvy
/

reddit-mental-map

Sleeping

App Files Files Community

ivcvy commited on Oct 17, 2024

Commit

6ed1df5

verified ·

1 Parent(s): 6a49825

initial commit

Browse files

Files changed (10) hide show

.gitattributes +2 -0
.streamlit/config.toml +2 -0
app.py +196 -0
data/adhd_clean.json +3 -0
data/aspergers_clean.json +0 -0
data/data_selection.ipynb +0 -0
data/depression_clean.json +0 -0
data/ocd_clean.json +3 -0
data/ptsd_clean.json +0 -0
requirements.txt +119 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+data/adhd_clean.json filter=lfs diff=lfs merge=lfs -text
+data/ocd_clean.json filter=lfs diff=lfs merge=lfs -text

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ [theme]
2	+ base="light"

app.py ADDED Viewed

	@@ -0,0 +1,196 @@

+import streamlit as st
+from datasets import load_dataset
+import json
+from wordcloud import WordCloud
+import matplotlib.pyplot as plt
+import networkx as nx
+from pyvis.network import Network
+import streamlit.components.v1 as components
+# main layout
+HEIGHT = 800
+st.set_page_config(layout="wide")
+st.title("Reddit mental map 🧠")
+col1, col2, col3 = st.columns([1, 1, 2])
+with col2:
+    upper_panel = st.container()
+    middle_panel = st.container()
+    lower_panel = st.container()
+st.sidebar.title("Reddit mental map 🧠")
+st.sidebar.write("This app is a mental map of Reddit posts related to:")
+st.sidebar.markdown(
+    """
+                    - Attention-deficit/hyperactivity disorder (ADHD)
+                    - Aspergers
+                    - Depression
+                    - Obsessive-compulsive disorder (OCD)
+                    - Post-traumatic stress disorder (PTSD)
+    """
+)
+st.sidebar.write(
+    "The map aims to display a glimpse of :red-background[personal point of views of people who navigate through their mental wellbeing journey]."
+)
+st.sidebar.header("Update mental map ✨")
+condition = st.sidebar.selectbox(
+    "Select a condition", ["ADHD", "Aspergers", "Depression", "OCD", "PTSD"]
+)
+st.sidebar.header("References:")
+st.sidebar.markdown(
+    "Hugging Face datasets: [reddit_mental_health_posts] (https://huggingface.co/datasets/solomonk/reddit_mental_health_posts)"
+)
+st.sidebar.markdown(
+    "Semantic role labeling code adapted from [FS Ndzomga's Medium] (https://medium.com/thoughts-on-machine-learning/building-knowledge-graphs-with-spacy-networkx-and-matplotlib-a-glimpse-into-semantic-role-e49c9dbe26b4)"
+)
+# data loader
+dataset = load_dataset("solomonk/reddit_mental_health_posts")
+df = dataset["train"].to_pandas()
+if condition == "ADHD":
+    df = df[df["subreddit"] == "ADHD"]
+    json_file = "data/adhd_clean.json"
+elif condition == "Aspergers":
+    df = df[df["subreddit"] == "aspergers"]
+    json_file = "data/aspergers_clean.json"
+elif condition == "Depression":
+    df = df[df["subreddit"] == "depression"]
+    json_file = "data/depression_clean.json"
+elif condition == "OCD":
+    df = df[df["subreddit"] == "OCD"]
+    json_file = "data/ocd_clean.json"
+elif condition == "PTSD":
+    df = df[df["subreddit"] == "ptsd"]
+    json_file = "data/ptsd_clean.json"
+with open(json_file, "r") as f:  # Change by diagnosis
+    srl_results = json.load(f)
+subjects = " ".join(
+    value for d in srl_results if "subjects" in d for value in d["subjects"]
+)
+verbs = " ".join(value for d in srl_results if "verbs" in d for value in d["verbs"])
+objects = " ".join(
+    value for d in srl_results if "objects" in d for value in d["objects"]
+)
+# dataframe
+with col1:
+    body = df["body"][~df["body"].isin(["[removed]", "[deleted]"])]
+    event = st.dataframe(
+        body,
+        use_container_width=True,
+        height=HEIGHT,
+        hide_index=True,
+        on_select="rerun",
+        selection_mode="single-row",
+    )
+# word cloud
+stopwords = [
+    "day",
+    "hour",
+    "hours",
+    "know",
+    "month",
+    "talk",
+    "thing",
+    "things",
+    "think",
+    "time",
+    "try",
+    "want",
+    "year",
+]
+def generate_better_wordcloud(data, mask=None):
+    cloud = WordCloud(
+        scale=3,
+        max_words=150,
+        colormap="RdGy",
+        mask=mask,
+        background_color="white",
+        stopwords=stopwords,
+        collocations=True,
+    ).generate_from_text(data)
+    fig = plt.figure()
+    plt.imshow(cloud)
+    plt.axis("off")
+    return fig
+with upper_panel:
+    st.subheader("Subjects")
+    figs = generate_better_wordcloud(subjects)
+    st.pyplot(figs)
+with middle_panel:
+    st.subheader("Verbs")
+    figv = generate_better_wordcloud(verbs)
+    st.pyplot(figv)
+with lower_panel:
+    st.subheader("Objects")
+    figo = generate_better_wordcloud(objects)
+    st.pyplot(figo)
+# network
+def build_and_plot_knowledge_graph_pyvis(result):
+    G = nx.DiGraph()
+    subjects = result["subjects"]
+    verbs = result["verbs"]
+    objects = result["objects"]
+    indirect_objects = result["indirect_objects"]
+    for subject in subjects:
+        for verb in verbs:
+            for obj in objects:
+                G.add_edge(subject, obj, label=verb)
+            for ind_obj in indirect_objects:
+                G.add_edge(subject, ind_obj, label=verb)
+    pos = nx.spring_layout(G, seed=42, k=0.5, iterations=50)
+    nx.draw(
+        G,
+        pos,
+        with_labels=True,
+        node_color="#FF746C",
+        node_size=2000,
+        font_size=12,
+        font_color="black",
+        font_weight="normal",
+        arrows=True,
+    )
+    edge_labels = nx.get_edge_attributes(G, "label")
+    nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
+    net = Network()
+    net.repulsion()
+    net.from_nx(G)
+    fig = plt.gcf()
+    return fig
+with col3:
+    try:
+        st.subheader("Mental map")
+        st.write(
+            "This is a 2D knowledge graph from simple :red-background[semantic role labeling] of the reddit post using spaCy, NetworkX, and Matplotlib. :red-background[Select a row to display the mental map of the individual post]. The graph shows the relationship between the subject, verb, and object at singular level, to complement the full-level overview of the word clouds. It takes a moment to load the data and if the image does not show, it is because some of the posts are deleted or removed in the original dataset."
+        )
+        person = int(event.selection.rows[0])
+        plt.clf()
+        fign = build_and_plot_knowledge_graph_pyvis(srl_results[person])
+        st.pyplot(fign)
+    except:
+        pass

data/adhd_clean.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d87a774ba07160c0952e04843dc5e3a4d6f1e839e34017c40916e683b9d91668
+size 10709968

data/aspergers_clean.json ADDED Viewed

The diff for this file is too large to render. See raw diff

data/data_selection.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

data/depression_clean.json ADDED Viewed

The diff for this file is too large to render. See raw diff

data/ocd_clean.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fa2968cfeac02e40ad9c6c36b68f3e0b96b0ef174cb823b0632e405727b3dd81
+size 10557080

data/ptsd_clean.json ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,119 @@

+aiohappyeyeballs==2.4.3
+aiohttp==3.10.10
+aiosignal==1.3.1
+altair==5.4.1
+annotated-types==0.7.0
+appnope==0.1.4
+asttokens==2.4.1
+async-timeout==4.0.3
+attrs==24.2.0
+blinker==1.8.2
+blis==1.0.1
+cachetools==5.5.0
+catalogue==2.0.10
+certifi==2024.8.30
+charset-normalizer==3.4.0
+click==8.1.7
+cloudpathlib==0.19.0
+comm==0.2.2
+concepcy==0.1.0
+confection==0.1.5
+contourpy==1.3.0
+cycler==0.12.1
+cymem==2.0.8
+datasets==3.0.1
+debugpy==1.8.7
+decorator==5.1.1
+dill==0.3.8
+en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
+exceptiongroup==1.2.2
+executing==2.1.0
+filelock==3.16.1
+fonttools==4.54.1
+frozenlist==1.4.1
+fsspec==2024.6.1
+gitdb==4.0.11
+GitPython==3.1.43
+huggingface-hub==0.25.2
+idna==3.10
+ipykernel==6.29.5
+ipython==8.28.0
+jedi==0.19.1
+Jinja2==3.1.4
+jsonpickle==3.3.0
+jsonschema==4.23.0
+jsonschema-specifications==2024.10.1
+jupyter_client==8.6.3
+jupyter_core==5.7.2
+kiwisolver==1.4.7
+langcodes==3.4.1
+language_data==1.2.0
+marisa-trie==1.2.1
+markdown-it-py==3.0.0
+MarkupSafe==3.0.1
+matplotlib==3.9.2
+matplotlib-inline==0.1.7
+mdurl==0.1.2
+multidict==6.1.0
+multiprocess==0.70.16
+murmurhash==1.0.10
+narwhals==1.9.3
+nest-asyncio==1.6.0
+networkx==3.4.1
+numpy==2.0.2
+packaging==24.1
+pandas==2.2.3
+parso==0.8.4
+pexpect==4.9.0
+pillow==10.4.0
+platformdirs==4.3.6
+preshed==3.0.9
+prompt_toolkit==3.0.48
+propcache==0.2.0
+protobuf==5.28.2
+psutil==6.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pyarrow==17.0.0
+pydantic==1.10.18
+pydantic_core==2.23.4
+pydeck==0.9.1
+Pygments==2.18.0
+pyparsing==3.2.0
+python-dateutil==2.9.0.post0
+pytz==2024.2
+pyvis==0.3.2
+PyYAML==6.0.2
+pyzmq==26.2.0
+referencing==0.35.1
+request-boost==0.6
+requests==2.32.3
+rich==13.9.2
+rpds-py==0.20.0
+shellingham==1.5.4
+six==1.16.0
+smart-open==7.0.5
+smmap==5.0.1
+spacy==3.8.2
+spacy-legacy==3.0.12
+spacy-loggers==1.0.5
+srsly==2.4.8
+stack-data==0.6.3
+streamlit==1.39.0
+tenacity==9.0.0
+thinc==8.3.2
+toml==0.10.2
+tornado==6.4.1
+tqdm==4.66.5
+traitlets==5.14.3
+typer==0.12.5
+typing_extensions==4.12.2
+tzdata==2024.2
+urllib3==2.2.3
+wasabi==1.1.3
+wcwidth==0.2.13
+weasel==0.4.1
+wordcloud==1.9.3
+wrapt==1.16.0
+xxhash==3.5.0
+yarl==1.15.3