Spaces:

ando55
/

clinical_segment_splitter

Runtime error

File size: 3,944 Bytes

import streamlit as st
import numpy as np
from pandas import DataFrame
import run_segbot
from functionforDownloadButtons import download_button
import os
import json

os.system('git clone --depth 1 https://github.com/neologd/mecab-ipadic-neologd.git && cd mecab-ipadic-neologd && ./bin/install-mecab-ipadic-neologd -n -y -u -p $PWD')
os.system('git clone --depth 1 https://github.com/neologd/mecab-unidic-neologd.git && cd mecab-unidic-neologd && ./bin/install-mecab-unidic-neologd -n -y -u -p $PWD')



st.set_page_config(
    page_title="Clinical segment splitter",
    page_icon="🚑",
    layout="wide"
)


def _max_width_():
    max_width_str = f"max-width: 1400px;"
    st.markdown(
        f"""
    <style>
    .reportview-container .main .block-container{{
        {max_width_str}
    }}
    </style>    
    """,
        unsafe_allow_html=True,
    )


#_max_width_()

#c30 = st.columns([1,])

#with c30:
# st.image("logo.png", width=400)
st.title("🚑 Clinical segment splitter")
st.header("")



with st.expander("ℹ️ - About this app", expanded=True):

    st.write(
        """     
-   The *Clinical segment splitter* app is an implementation of our paper.
    >Kenichiro Ando, Takashi Okumura, Mamoru Komachi, Hiromasa Horiguchi, Yuji Matsumoto (2022) [Exploring optimal granularity for extractive summarization of unstructured health records: Analysis of the largest multi-institutional archive of health records in Japan.](https://doi.org/10.1371/journal.pdig.0000099) PLOS Digital Health 1(9): e000009.
-   This app automatically splits Japanese sentences into smaller units representing medical meanings.
	    """
    )

    st.markdown("")

st.markdown("")
st.markdown("## 📌 Paste document")
@st.cache(allow_output_mutation=True)
def model_load():
    return run_segbot.setup()
model,fm,index = model_load()
with st.form(key="my_form"):


    ce, c1, ce, c2, c3 = st.columns([0.07, 1, 0.07, 5, 0.07])
    with c1:
        ModelType = st.radio(
            "Choose the method of sentence split",
            ["fullstop & linebreak (Default)", "pySBD"],
            help="""
            At present, you can choose between 2 methods to split your text into sentences. 

            The fullstop & linebreak is naive and robust to noise, but has low accuracy.
            pySBD is more accurate, but more complex and less robust to noise.
            """,
        )

        if ModelType == "fullstop & linebreak (Default)":
            split_method="fullstop"
            
        else:
            split_method="pySBD"


    with c2:
        doc = st.text_area(
            "Paste your text below",
            height=510,
            placeholder="""
            グラム染色するも明らかな菌が見つからず、 髄液培養でも優位な菌は培養されなかった。
            細菌性髄膜炎に対するグラム染色の感度は60%程度であり、培養に関しても感度は高くない。
            また髄液中の糖はもう少し減るのではないだろうか。確定診断はつかないものの、最も疑わしい疾患であった。
            起因菌はMRSA,腸内細菌等を広域にカバーするためバンコマイシン,メロペネム(髄膜炎dose)とした。
            """,
        )

        submit_button = st.form_submit_button(label="Go to split 👍")


if not submit_button:
    st.stop()

keywords = run_segbot.generate(doc, model, fm, index, split_method)


st.markdown("## Results")

st.header("")


cs, c1, c2, c3, cLast = st.columns([2, 1.5, 1.5, 1.5, 2])

st.header("")

df = DataFrame(keywords)
df.index += 1
df.columns = ['Segment']
print(df)


with c1:
    CSVButton2 = download_button(keywords, "Data.csv", "📥 Download (.csv)")
with c2:
    CSVButton2 = download_button(keywords, "Data.txt", "📥 Download (.txt)")
with c3:
    CSVButton2 = download_button(keywords, "Data.json", "📥 Download (.json)")

#with c2:
st.table(df)