Spaces:

ando55
/

clinical_segment_splitter

Runtime error

App Files Files Community

clinical_segment_splitter / app.py

ando55

Update app.py

2514837 over 2 years ago

raw

history blame

3.94 kB

	import streamlit as st
	import numpy as np
	from pandas import DataFrame
	import run_segbot
	from functionforDownloadButtons import download_button
	import os
	import json

	os.system('git clone --depth 1 https://github.com/neologd/mecab-ipadic-neologd.git && cd mecab-ipadic-neologd && ./bin/install-mecab-ipadic-neologd -n -y -u -p $PWD')
	os.system('git clone --depth 1 https://github.com/neologd/mecab-unidic-neologd.git && cd mecab-unidic-neologd && ./bin/install-mecab-unidic-neologd -n -y -u -p $PWD')



	st.set_page_config(
	page_title="Clinical segment splitter",
	page_icon="🚑",
	layout="wide"
	)


	def _max_width_():
	max_width_str = f"max-width: 1400px;"
	st.markdown(
	f"""
	<style>
	.reportview-container .main .block-container{{
	{max_width_str}
	}}
	</style>
	""",
	unsafe_allow_html=True,
	)


	#_max_width_()

	#c30 = st.columns([1,])

	#with c30:
	# st.image("logo.png", width=400)
	st.title("🚑 Clinical segment splitter")
	st.header("")



	with st.expander("ℹ️ - About this app", expanded=True):

	st.write(
	"""
	- The Clinical segment splitter app is an implementation of our paper.
	>Kenichiro Ando, Takashi Okumura, Mamoru Komachi, Hiromasa Horiguchi, Yuji Matsumoto (2022) [Exploring optimal granularity for extractive summarization of unstructured health records: Analysis of the largest multi-institutional archive of health records in Japan.](https://doi.org/10.1371/journal.pdig.0000099) PLOS Digital Health 1(9): e000009.
	- This app automatically splits Japanese sentences into smaller units representing medical meanings.
	"""
	)

	st.markdown("")

	st.markdown("")
	st.markdown("## 📌 Paste document")
	@st.cache(allow_output_mutation=True)
	def model_load():
	return run_segbot.setup()
	model,fm,index = model_load()
	with st.form(key="my_form"):


	ce, c1, ce, c2, c3 = st.columns([0.07, 1, 0.07, 5, 0.07])
	with c1:
	ModelType = st.radio(
	"Choose the method of sentence split",
	["fullstop & linebreak (Default)", "pySBD"],
	help="""
	At present, you can choose between 2 methods to split your text into sentences.

	The fullstop & linebreak is naive and robust to noise, but has low accuracy.
	pySBD is more accurate, but more complex and less robust to noise.
	""",
	)

	if ModelType == "fullstop & linebreak (Default)":
	split_method="fullstop"

	else:
	split_method="pySBD"


	with c2:
	doc = st.text_area(
	"Paste your text below",
	height=510,
	placeholder="""
	グラム染色するも明らかな菌が見つからず、髄液培養でも優位な菌は培養されなかった。
	細菌性髄膜炎に対するグラム染色の感度は60%程度であり、培養に関しても感度は高くない。
	また髄液中の糖はもう少し減るのではないだろうか。確定診断はつかないものの、最も疑わしい疾患であった。
	起因菌はMRSA,腸内細菌等を広域にカバーするためバンコマイシン,メロペネム(髄膜炎dose)とした。
	""",
	)

	submit_button = st.form_submit_button(label="Go to split 👍")


	if not submit_button:
	st.stop()

	keywords = run_segbot.generate(doc, model, fm, index, split_method)


	st.markdown("## Results")

	st.header("")


	cs, c1, c2, c3, cLast = st.columns([2, 1.5, 1.5, 1.5, 2])

	st.header("")

	df = DataFrame(keywords)
	df.index += 1
	df.columns = ['Segment']
	print(df)


	with c1:
	CSVButton2 = download_button(keywords, "Data.csv", "📥 Download (.csv)")
	with c2:
	CSVButton2 = download_button(keywords, "Data.txt", "📥 Download (.txt)")
	with c3:
	CSVButton2 = download_button(keywords, "Data.json", "📥 Download (.json)")

	#with c2:
	st.table(df)