ivcvy commited on
Commit
6ed1df5
·
verified ·
1 Parent(s): 6a49825

initial commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/adhd_clean.json filter=lfs diff=lfs merge=lfs -text
37
+ data/ocd_clean.json filter=lfs diff=lfs merge=lfs -text
.streamlit/config.toml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [theme]
2
+ base="light"
app.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from datasets import load_dataset
3
+ import json
4
+ from wordcloud import WordCloud
5
+ import matplotlib.pyplot as plt
6
+ import networkx as nx
7
+ from pyvis.network import Network
8
+ import streamlit.components.v1 as components
9
+
10
+ # main layout
11
+ HEIGHT = 800
12
+ st.set_page_config(layout="wide")
13
+ st.title("Reddit mental map 🧠")
14
+ col1, col2, col3 = st.columns([1, 1, 2])
15
+ with col2:
16
+ upper_panel = st.container()
17
+ middle_panel = st.container()
18
+ lower_panel = st.container()
19
+
20
+ st.sidebar.title("Reddit mental map 🧠")
21
+
22
+ st.sidebar.write("This app is a mental map of Reddit posts related to:")
23
+ st.sidebar.markdown(
24
+ """
25
+ - Attention-deficit/hyperactivity disorder (ADHD)
26
+ - Aspergers
27
+ - Depression
28
+ - Obsessive-compulsive disorder (OCD)
29
+ - Post-traumatic stress disorder (PTSD)
30
+ """
31
+ )
32
+
33
+ st.sidebar.write(
34
+ "The map aims to display a glimpse of :red-background[personal point of views of people who navigate through their mental wellbeing journey]."
35
+ )
36
+
37
+ st.sidebar.header("Update mental map ✨")
38
+ condition = st.sidebar.selectbox(
39
+ "Select a condition", ["ADHD", "Aspergers", "Depression", "OCD", "PTSD"]
40
+ )
41
+
42
+ st.sidebar.header("References:")
43
+ st.sidebar.markdown(
44
+ "Hugging Face datasets: [reddit_mental_health_posts] (https://huggingface.co/datasets/solomonk/reddit_mental_health_posts)"
45
+ )
46
+ st.sidebar.markdown(
47
+ "Semantic role labeling code adapted from [FS Ndzomga's Medium] (https://medium.com/thoughts-on-machine-learning/building-knowledge-graphs-with-spacy-networkx-and-matplotlib-a-glimpse-into-semantic-role-e49c9dbe26b4)"
48
+ )
49
+
50
+ # data loader
51
+ dataset = load_dataset("solomonk/reddit_mental_health_posts")
52
+ df = dataset["train"].to_pandas()
53
+
54
+ if condition == "ADHD":
55
+ df = df[df["subreddit"] == "ADHD"]
56
+ json_file = "data/adhd_clean.json"
57
+ elif condition == "Aspergers":
58
+ df = df[df["subreddit"] == "aspergers"]
59
+ json_file = "data/aspergers_clean.json"
60
+ elif condition == "Depression":
61
+ df = df[df["subreddit"] == "depression"]
62
+ json_file = "data/depression_clean.json"
63
+ elif condition == "OCD":
64
+ df = df[df["subreddit"] == "OCD"]
65
+ json_file = "data/ocd_clean.json"
66
+ elif condition == "PTSD":
67
+ df = df[df["subreddit"] == "ptsd"]
68
+ json_file = "data/ptsd_clean.json"
69
+
70
+ with open(json_file, "r") as f: # Change by diagnosis
71
+ srl_results = json.load(f)
72
+
73
+ subjects = " ".join(
74
+ value for d in srl_results if "subjects" in d for value in d["subjects"]
75
+ )
76
+ verbs = " ".join(value for d in srl_results if "verbs" in d for value in d["verbs"])
77
+ objects = " ".join(
78
+ value for d in srl_results if "objects" in d for value in d["objects"]
79
+ )
80
+
81
+ # dataframe
82
+ with col1:
83
+ body = df["body"][~df["body"].isin(["[removed]", "[deleted]"])]
84
+ event = st.dataframe(
85
+ body,
86
+ use_container_width=True,
87
+ height=HEIGHT,
88
+ hide_index=True,
89
+ on_select="rerun",
90
+ selection_mode="single-row",
91
+ )
92
+
93
+
94
+ # word cloud
95
+ stopwords = [
96
+ "day",
97
+ "hour",
98
+ "hours",
99
+ "know",
100
+ "month",
101
+ "talk",
102
+ "thing",
103
+ "things",
104
+ "think",
105
+ "time",
106
+ "try",
107
+ "want",
108
+ "year",
109
+ ]
110
+
111
+
112
+ def generate_better_wordcloud(data, mask=None):
113
+ cloud = WordCloud(
114
+ scale=3,
115
+ max_words=150,
116
+ colormap="RdGy",
117
+ mask=mask,
118
+ background_color="white",
119
+ stopwords=stopwords,
120
+ collocations=True,
121
+ ).generate_from_text(data)
122
+ fig = plt.figure()
123
+ plt.imshow(cloud)
124
+ plt.axis("off")
125
+ return fig
126
+
127
+
128
+ with upper_panel:
129
+ st.subheader("Subjects")
130
+ figs = generate_better_wordcloud(subjects)
131
+ st.pyplot(figs)
132
+
133
+ with middle_panel:
134
+ st.subheader("Verbs")
135
+ figv = generate_better_wordcloud(verbs)
136
+ st.pyplot(figv)
137
+
138
+ with lower_panel:
139
+ st.subheader("Objects")
140
+ figo = generate_better_wordcloud(objects)
141
+ st.pyplot(figo)
142
+
143
+
144
+ # network
145
+ def build_and_plot_knowledge_graph_pyvis(result):
146
+ G = nx.DiGraph()
147
+
148
+ subjects = result["subjects"]
149
+ verbs = result["verbs"]
150
+ objects = result["objects"]
151
+ indirect_objects = result["indirect_objects"]
152
+
153
+ for subject in subjects:
154
+ for verb in verbs:
155
+ for obj in objects:
156
+ G.add_edge(subject, obj, label=verb)
157
+ for ind_obj in indirect_objects:
158
+ G.add_edge(subject, ind_obj, label=verb)
159
+
160
+ pos = nx.spring_layout(G, seed=42, k=0.5, iterations=50)
161
+
162
+ nx.draw(
163
+ G,
164
+ pos,
165
+ with_labels=True,
166
+ node_color="#FF746C",
167
+ node_size=2000,
168
+ font_size=12,
169
+ font_color="black",
170
+ font_weight="normal",
171
+ arrows=True,
172
+ )
173
+
174
+ edge_labels = nx.get_edge_attributes(G, "label")
175
+ nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
176
+
177
+ net = Network()
178
+ net.repulsion()
179
+ net.from_nx(G)
180
+ fig = plt.gcf()
181
+
182
+ return fig
183
+
184
+
185
+ with col3:
186
+ try:
187
+ st.subheader("Mental map")
188
+ st.write(
189
+ "This is a 2D knowledge graph from simple :red-background[semantic role labeling] of the reddit post using spaCy, NetworkX, and Matplotlib. :red-background[Select a row to display the mental map of the individual post]. The graph shows the relationship between the subject, verb, and object at singular level, to complement the full-level overview of the word clouds. It takes a moment to load the data and if the image does not show, it is because some of the posts are deleted or removed in the original dataset."
190
+ )
191
+ person = int(event.selection.rows[0])
192
+ plt.clf()
193
+ fign = build_and_plot_knowledge_graph_pyvis(srl_results[person])
194
+ st.pyplot(fign)
195
+ except:
196
+ pass
data/adhd_clean.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d87a774ba07160c0952e04843dc5e3a4d6f1e839e34017c40916e683b9d91668
3
+ size 10709968
data/aspergers_clean.json ADDED
The diff for this file is too large to render. See raw diff
 
data/data_selection.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
data/depression_clean.json ADDED
The diff for this file is too large to render. See raw diff
 
data/ocd_clean.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa2968cfeac02e40ad9c6c36b68f3e0b96b0ef174cb823b0632e405727b3dd81
3
+ size 10557080
data/ptsd_clean.json ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohappyeyeballs==2.4.3
2
+ aiohttp==3.10.10
3
+ aiosignal==1.3.1
4
+ altair==5.4.1
5
+ annotated-types==0.7.0
6
+ appnope==0.1.4
7
+ asttokens==2.4.1
8
+ async-timeout==4.0.3
9
+ attrs==24.2.0
10
+ blinker==1.8.2
11
+ blis==1.0.1
12
+ cachetools==5.5.0
13
+ catalogue==2.0.10
14
+ certifi==2024.8.30
15
+ charset-normalizer==3.4.0
16
+ click==8.1.7
17
+ cloudpathlib==0.19.0
18
+ comm==0.2.2
19
+ concepcy==0.1.0
20
+ confection==0.1.5
21
+ contourpy==1.3.0
22
+ cycler==0.12.1
23
+ cymem==2.0.8
24
+ datasets==3.0.1
25
+ debugpy==1.8.7
26
+ decorator==5.1.1
27
+ dill==0.3.8
28
+ en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
29
+ exceptiongroup==1.2.2
30
+ executing==2.1.0
31
+ filelock==3.16.1
32
+ fonttools==4.54.1
33
+ frozenlist==1.4.1
34
+ fsspec==2024.6.1
35
+ gitdb==4.0.11
36
+ GitPython==3.1.43
37
+ huggingface-hub==0.25.2
38
+ idna==3.10
39
+ ipykernel==6.29.5
40
+ ipython==8.28.0
41
+ jedi==0.19.1
42
+ Jinja2==3.1.4
43
+ jsonpickle==3.3.0
44
+ jsonschema==4.23.0
45
+ jsonschema-specifications==2024.10.1
46
+ jupyter_client==8.6.3
47
+ jupyter_core==5.7.2
48
+ kiwisolver==1.4.7
49
+ langcodes==3.4.1
50
+ language_data==1.2.0
51
+ marisa-trie==1.2.1
52
+ markdown-it-py==3.0.0
53
+ MarkupSafe==3.0.1
54
+ matplotlib==3.9.2
55
+ matplotlib-inline==0.1.7
56
+ mdurl==0.1.2
57
+ multidict==6.1.0
58
+ multiprocess==0.70.16
59
+ murmurhash==1.0.10
60
+ narwhals==1.9.3
61
+ nest-asyncio==1.6.0
62
+ networkx==3.4.1
63
+ numpy==2.0.2
64
+ packaging==24.1
65
+ pandas==2.2.3
66
+ parso==0.8.4
67
+ pexpect==4.9.0
68
+ pillow==10.4.0
69
+ platformdirs==4.3.6
70
+ preshed==3.0.9
71
+ prompt_toolkit==3.0.48
72
+ propcache==0.2.0
73
+ protobuf==5.28.2
74
+ psutil==6.0.0
75
+ ptyprocess==0.7.0
76
+ pure_eval==0.2.3
77
+ pyarrow==17.0.0
78
+ pydantic==1.10.18
79
+ pydantic_core==2.23.4
80
+ pydeck==0.9.1
81
+ Pygments==2.18.0
82
+ pyparsing==3.2.0
83
+ python-dateutil==2.9.0.post0
84
+ pytz==2024.2
85
+ pyvis==0.3.2
86
+ PyYAML==6.0.2
87
+ pyzmq==26.2.0
88
+ referencing==0.35.1
89
+ request-boost==0.6
90
+ requests==2.32.3
91
+ rich==13.9.2
92
+ rpds-py==0.20.0
93
+ shellingham==1.5.4
94
+ six==1.16.0
95
+ smart-open==7.0.5
96
+ smmap==5.0.1
97
+ spacy==3.8.2
98
+ spacy-legacy==3.0.12
99
+ spacy-loggers==1.0.5
100
+ srsly==2.4.8
101
+ stack-data==0.6.3
102
+ streamlit==1.39.0
103
+ tenacity==9.0.0
104
+ thinc==8.3.2
105
+ toml==0.10.2
106
+ tornado==6.4.1
107
+ tqdm==4.66.5
108
+ traitlets==5.14.3
109
+ typer==0.12.5
110
+ typing_extensions==4.12.2
111
+ tzdata==2024.2
112
+ urllib3==2.2.3
113
+ wasabi==1.1.3
114
+ wcwidth==0.2.13
115
+ weasel==0.4.1
116
+ wordcloud==1.9.3
117
+ wrapt==1.16.0
118
+ xxhash==3.5.0
119
+ yarl==1.15.3