Spaces:
Runtime error
Runtime error
import streamlit as st | |
import numpy as np | |
from pandas import DataFrame | |
import run_segbot | |
from functionforDownloadButtons import download_button | |
import os | |
import json | |
os.system('git clone --depth 1 https://github.com/neologd/mecab-ipadic-neologd.git && cd mecab-ipadic-neologd && ./bin/install-mecab-ipadic-neologd -n -y -u -p $PWD') | |
os.system('git clone --depth 1 https://github.com/neologd/mecab-unidic-neologd.git && cd mecab-unidic-neologd && ./bin/install-mecab-unidic-neologd -n -y -u -p $PWD') | |
st.set_page_config( | |
page_title="Clinical segment splitter", | |
page_icon="๐", | |
layout="wide" | |
) | |
def _max_width_(): | |
max_width_str = f"max-width: 1400px;" | |
st.markdown( | |
f""" | |
<style> | |
.reportview-container .main .block-container{{ | |
{max_width_str} | |
}} | |
</style> | |
""", | |
unsafe_allow_html=True, | |
) | |
#_max_width_() | |
#c30 = st.columns([1,]) | |
#with c30: | |
# st.image("logo.png", width=400) | |
st.title("๐ Clinical segment splitter") | |
st.header("") | |
with st.expander("โน๏ธ - About this app", expanded=True): | |
st.write( | |
""" | |
- The *Clinical segment splitter* app is an implementation of our paper. | |
>Kenichiro Ando, Takashi Okumura, Mamoru Komachi, Hiromasa Horiguchi, Yuji Matsumoto (2022) [Exploring optimal granularity for extractive summarization of unstructured health records: Analysis of the largest multi-institutional archive of health records in Japan.](https://doi.org/10.1371/journal.pdig.0000099) PLOS Digital Health 1(9): e000009. | |
- This app automatically splits Japanese sentences into smaller units representing medical meanings. | |
""" | |
) | |
st.markdown("") | |
st.markdown("") | |
st.markdown("## ๐ Paste document") | |
def model_load(): | |
return run_segbot.setup() | |
model,fm,index = model_load() | |
with st.form(key="my_form"): | |
ce, c1, ce, c2, c3 = st.columns([0.07, 1, 0.07, 5, 0.07]) | |
with c1: | |
ModelType = st.radio( | |
"Choose the method of sentence split", | |
["fullstop & linebreak (Default)", "pySBD"], | |
help=""" | |
At present, you can choose between 2 methods to split your text into sentences. | |
The fullstop & linebreak is naive and robust to noise, but has low accuracy. | |
pySBD is more accurate, but more complex and less robust to noise. | |
""", | |
) | |
if ModelType == "fullstop & linebreak (Default)": | |
split_method="fullstop" | |
else: | |
split_method="pySBD" | |
with c2: | |
doc = st.text_area( | |
"Paste your text below", | |
height=510, | |
placeholder=""" | |
ใฐใฉใ ๆ่ฒใใใๆใใใช่ใ่ฆใคใใใใ ้ซๆถฒๅน้คใงใๅชไฝใช่ใฏๅน้คใใใชใใฃใใ | |
็ดฐ่ๆง้ซ่็ใซๅฏพใใใฐใฉใ ๆ่ฒใฎๆๅบฆใฏ60%็จๅบฆใงใใใๅน้คใซ้ขใใฆใๆๅบฆใฏ้ซใใชใใ | |
ใพใ้ซๆถฒไธญใฎ็ณใฏใใๅฐใๆธใใฎใงใฏใชใใ ใใใใ็ขบๅฎ่จบๆญใฏใคใใชใใใฎใฎใๆใ็ใใใ็พๆฃใงใใฃใใ | |
่ตทๅ ่ใฏMRSA,่ ธๅ ็ดฐ่็ญใๅบๅใซใซใใผใใใใใใณใณใใคใทใณ,ใกใญใใใ (้ซ่็dose)ใจใใใ | |
""", | |
) | |
submit_button = st.form_submit_button(label="Go to split ๐") | |
if not submit_button: | |
st.stop() | |
keywords = run_segbot.generate(doc, model, fm, index, split_method) | |
st.markdown("## Results") | |
st.header("") | |
cs, c1, c2, c3, cLast = st.columns([2, 1.5, 1.5, 1.5, 2]) | |
st.header("") | |
df = DataFrame(keywords) | |
df.index += 1 | |
df.columns = ['Segment'] | |
print(df) | |
with c1: | |
CSVButton2 = download_button(keywords, "Data.csv", "๐ฅ Download (.csv)") | |
with c2: | |
CSVButton2 = download_button(keywords, "Data.txt", "๐ฅ Download (.txt)") | |
with c3: | |
CSVButton2 = download_button(keywords, "Data.json", "๐ฅ Download (.json)") | |
#with c2: | |
st.table(df) | |