File size: 3,944 Bytes
46a030d
 
 
 
 
 
 
926183f
75e63cc
02ccaba
 
 
da90a83
46a030d
5dc1676
46a030d
 
 
926183f
 
46a030d
 
 
 
 
 
 
 
 
 
 
 
926183f
 
46a030d
 
 
 
 
 
5dc1676
46a030d
 
 
 
 
 
 
 
6ffd015
 
 
46a030d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4295a5f
 
 
 
 
2514837
46a030d
 
2514837
46a030d
 
 
 
 
 
 
 
4295a5f
46a030d
 
 
 
 
 
 
 
 
 
 
 
 
4295a5f
 
 
 
 
 
 
46a030d
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import streamlit as st
import numpy as np
from pandas import DataFrame
import run_segbot
from functionforDownloadButtons import download_button
import os
import json

os.system('git clone --depth 1 https://github.com/neologd/mecab-ipadic-neologd.git && cd mecab-ipadic-neologd && ./bin/install-mecab-ipadic-neologd -n -y -u -p $PWD')
os.system('git clone --depth 1 https://github.com/neologd/mecab-unidic-neologd.git && cd mecab-unidic-neologd && ./bin/install-mecab-unidic-neologd -n -y -u -p $PWD')



st.set_page_config(
    page_title="Clinical segment splitter",
    page_icon="๐Ÿš‘",
    layout="wide"
)


def _max_width_():
    max_width_str = f"max-width: 1400px;"
    st.markdown(
        f"""
    <style>
    .reportview-container .main .block-container{{
        {max_width_str}
    }}
    </style>    
    """,
        unsafe_allow_html=True,
    )


#_max_width_()

#c30 = st.columns([1,])

#with c30:
# st.image("logo.png", width=400)
st.title("๐Ÿš‘ Clinical segment splitter")
st.header("")



with st.expander("โ„น๏ธ - About this app", expanded=True):

    st.write(
        """     
-   The *Clinical segment splitter* app is an implementation of our paper.
    >Kenichiro Ando, Takashi Okumura, Mamoru Komachi, Hiromasa Horiguchi, Yuji Matsumoto (2022) [Exploring optimal granularity for extractive summarization of unstructured health records: Analysis of the largest multi-institutional archive of health records in Japan.](https://doi.org/10.1371/journal.pdig.0000099) PLOS Digital Health 1(9): e000009.
-   This app automatically splits Japanese sentences into smaller units representing medical meanings.
	    """
    )

    st.markdown("")

st.markdown("")
st.markdown("## ๐Ÿ“Œ Paste document")
@st.cache(allow_output_mutation=True)
def model_load():
    return run_segbot.setup()
model,fm,index = model_load()
with st.form(key="my_form"):


    ce, c1, ce, c2, c3 = st.columns([0.07, 1, 0.07, 5, 0.07])
    with c1:
        ModelType = st.radio(
            "Choose the method of sentence split",
            ["fullstop & linebreak (Default)", "pySBD"],
            help="""
            At present, you can choose between 2 methods to split your text into sentences. 

            The fullstop & linebreak is naive and robust to noise, but has low accuracy.
            pySBD is more accurate, but more complex and less robust to noise.
            """,
        )

        if ModelType == "fullstop & linebreak (Default)":
            split_method="fullstop"
            
        else:
            split_method="pySBD"


    with c2:
        doc = st.text_area(
            "Paste your text below",
            height=510,
            placeholder="""
            ใ‚ฐใƒฉใƒ ๆŸ“่‰ฒใ™ใ‚‹ใ‚‚ๆ˜Žใ‚‰ใ‹ใช่ŒใŒ่ฆ‹ใคใ‹ใ‚‰ใšใ€ ้ซ„ๆถฒๅŸน้คŠใงใ‚‚ๅ„ชไฝใช่ŒใฏๅŸน้คŠใ•ใ‚Œใชใ‹ใฃใŸใ€‚
            ็ดฐ่Œๆ€ง้ซ„่†œ็‚Žใซๅฏพใ™ใ‚‹ใ‚ฐใƒฉใƒ ๆŸ“่‰ฒใฎๆ„Ÿๅบฆใฏ60%็จ‹ๅบฆใงใ‚ใ‚Šใ€ๅŸน้คŠใซ้–ขใ—ใฆใ‚‚ๆ„Ÿๅบฆใฏ้ซ˜ใใชใ„ใ€‚
            ใพใŸ้ซ„ๆถฒไธญใฎ็ณ–ใฏใ‚‚ใ†ๅฐ‘ใ—ๆธ›ใ‚‹ใฎใงใฏใชใ„ใ ใ‚ใ†ใ‹ใ€‚็ขบๅฎš่จบๆ–ญใฏใคใ‹ใชใ„ใ‚‚ใฎใฎใ€ๆœ€ใ‚‚็–‘ใ‚ใ—ใ„็–พๆ‚ฃใงใ‚ใฃใŸใ€‚
            ่ตทๅ› ่ŒใฏMRSA,่…ธๅ†…็ดฐ่Œ็ญ‰ใ‚’ๅบƒๅŸŸใซใ‚ซใƒใƒผใ™ใ‚‹ใŸใ‚ใƒใƒณใ‚ณใƒžใ‚คใ‚ทใƒณ,ใƒกใƒญใƒšใƒใƒ (้ซ„่†œ็‚Ždose)ใจใ—ใŸใ€‚
            """,
        )

        submit_button = st.form_submit_button(label="Go to split ๐Ÿ‘")


if not submit_button:
    st.stop()

keywords = run_segbot.generate(doc, model, fm, index, split_method)


st.markdown("## Results")

st.header("")


cs, c1, c2, c3, cLast = st.columns([2, 1.5, 1.5, 1.5, 2])

st.header("")

df = DataFrame(keywords)
df.index += 1
df.columns = ['Segment']
print(df)


with c1:
    CSVButton2 = download_button(keywords, "Data.csv", "๐Ÿ“ฅ Download (.csv)")
with c2:
    CSVButton2 = download_button(keywords, "Data.txt", "๐Ÿ“ฅ Download (.txt)")
with c3:
    CSVButton2 = download_button(keywords, "Data.json", "๐Ÿ“ฅ Download (.json)")

#with c2:
st.table(df)