Spaces:

ivcvy
/

reddit-mental-map

Sleeping

App Files Files Community

reddit-mental-map / app.py

ivcvy

Update app.py

ca0820a verified 8 months ago

raw

history blame contribute delete

5.65 kB

	import streamlit as st
	from datasets import load_dataset
	import json
	from wordcloud import WordCloud
	import matplotlib.pyplot as plt
	import networkx as nx
	from pyvis.network import Network
	import streamlit.components.v1 as components

	# main layout
	HEIGHT = 800
	st.set_page_config(layout="wide")
	st.title("Reddit mental map 🧠")
	col1, col2, col3 = st.columns([1, 1, 2])
	with col2:
	upper_panel = st.container()
	middle_panel = st.container()
	lower_panel = st.container()

	st.sidebar.title("Reddit mental map 🧠")

	st.sidebar.write("This app is a mental map of Reddit posts related to:")
	st.sidebar.markdown(
	"""
	- Attention-deficit/hyperactivity disorder (ADHD)
	- Aspergers
	- Depression
	- Obsessive-compulsive disorder (OCD)
	- Post-traumatic stress disorder (PTSD)
	"""
	)

	st.sidebar.write(
	"The map aims to display a glimpse of :red-background[personal point of views of people who navigate through their mental wellbeing journey]."
	)

	st.sidebar.header("Update mental map ✨")
	condition = st.sidebar.selectbox(
	"Select a condition", ["ADHD", "Aspergers", "Depression", "OCD", "PTSD"]
	)

	st.sidebar.header("References:")
	st.sidebar.markdown(
	"Hugging Face datasets: [reddit_mental_health_posts] (https://huggingface.co/datasets/solomonk/reddit_mental_health_posts)"
	)
	st.sidebar.markdown(
	"Semantic role labeling code adapted from [FS Ndzomga's Medium] (https://medium.com/thoughts-on-machine-learning/building-knowledge-graphs-with-spacy-networkx-and-matplotlib-a-glimpse-into-semantic-role-e49c9dbe26b4)"
	)

	# data loader
	dataset = load_dataset("solomonk/reddit_mental_health_posts")
	df = dataset["train"].to_pandas()

	if condition == "ADHD":
	df = df[df["subreddit"] == "ADHD"]
	json_file = "data/adhd_clean.json"
	elif condition == "Aspergers":
	df = df[df["subreddit"] == "aspergers"]
	json_file = "data/aspergers_clean.json"
	elif condition == "Depression":
	df = df[df["subreddit"] == "depression"]
	json_file = "data/depression_clean.json"
	elif condition == "OCD":
	df = df[df["subreddit"] == "OCD"]
	json_file = "data/ocd_clean.json"
	elif condition == "PTSD":
	df = df[df["subreddit"] == "ptsd"]
	json_file = "data/ptsd_clean.json"

	with open(json_file, "r") as f: # Change by diagnosis
	srl_results = json.load(f)

	subjects = " ".join(
	value for d in srl_results if "subjects" in d for value in d["subjects"]
	)
	verbs = " ".join(value for d in srl_results if "verbs" in d for value in d["verbs"])
	objects = " ".join(
	value for d in srl_results if "objects" in d for value in d["objects"]
	)

	# dataframe
	with col1:
	body = df["body"][~df["body"].isin(["[removed]", "[deleted]"])]
	event = st.dataframe(
	body,
	use_container_width=True,
	height=HEIGHT,
	hide_index=True,
	on_select="rerun",
	selection_mode="single-row",
	)


	# word cloud
	stopwords = [
	"day",
	"hour",
	"hours",
	"know",
	"month",
	"talk",
	"thing",
	"things",
	"think",
	"time",
	"try",
	"want",
	"year",
	]


	def generate_better_wordcloud(data, mask=None):
	cloud = WordCloud(
	scale=3,
	max_words=150,
	colormap="RdGy",
	mask=mask,
	background_color="white",
	stopwords=stopwords,
	collocations=True,
	).generate_from_text(data)
	fig = plt.figure()
	plt.imshow(cloud)
	plt.axis("off")
	return fig


	with upper_panel:
	st.subheader("Subjects")
	figs = generate_better_wordcloud(subjects)
	st.pyplot(figs)

	with middle_panel:
	st.subheader("Verbs")
	figv = generate_better_wordcloud(verbs)
	st.pyplot(figv)

	with lower_panel:
	st.subheader("Objects")
	figo = generate_better_wordcloud(objects)
	st.pyplot(figo)


	# network
	def build_and_plot_knowledge_graph_pyvis(result):
	G = nx.DiGraph()

	subjects = result["subjects"]
	verbs = result["verbs"]
	objects = result["objects"]
	indirect_objects = result["indirect_objects"]

	for subject in subjects:
	for verb in verbs:
	for obj in objects:
	G.add_edge(subject, obj, label=verb)
	for ind_obj in indirect_objects:
	G.add_edge(subject, ind_obj, label=verb)

	pos = nx.spring_layout(G, seed=42, k=0.5, iterations=50)

	nx.draw(
	G,
	pos,
	with_labels=True,
	node_color="#FF746C",
	node_size=2000,
	font_size=12,
	font_color="black",
	font_weight="normal",
	arrows=True,
	)

	edge_labels = nx.get_edge_attributes(G, "label")
	nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)

	net = Network()
	net.repulsion()
	net.from_nx(G)
	fig = plt.gcf()

	return fig


	with col3:
	try:
	st.subheader("Mental map")
	st.write(
	"This is a 2D knowledge graph from simple :red-background[semantic role labeling] of the reddit post using spaCy, NetworkX, and Matplotlib. :red-background[Tick the box next to the row of interest to display the mental map of the individual post]. The graph shows the relationship between the subject, verb, and object at singular level, to complement the full-level overview of the word clouds. It takes a moment to load the data and if the image does not show, it is because some of the posts are deleted or removed in the original dataset."
	)
	person = int(event.selection.rows[0])
	plt.clf()
	fign = build_and_plot_knowledge_graph_pyvis(srl_results[person])
	st.pyplot(fign)
	except:
	pass