import streamlit as st import numpy as np from pandas import DataFrame import run_segbot from functionforDownloadButtons import download_button import os import json os.system('git clone --depth 1 https://github.com/neologd/mecab-ipadic-neologd.git && cd mecab-ipadic-neologd && ./bin/install-mecab-ipadic-neologd -n -y -u -p $PWD') os.system('git clone --depth 1 https://github.com/neologd/mecab-unidic-neologd.git && cd mecab-unidic-neologd && ./bin/install-mecab-unidic-neologd -n -y -u -p $PWD') st.set_page_config( page_title="Clinical segment splitter", page_icon="๐Ÿš‘", layout="wide" ) def _max_width_(): max_width_str = f"max-width: 1400px;" st.markdown( f""" """, unsafe_allow_html=True, ) #_max_width_() #c30 = st.columns([1,]) #with c30: # st.image("logo.png", width=400) st.title("๐Ÿš‘ Clinical segment splitter") st.header("") with st.expander("โ„น๏ธ - About this app", expanded=True): st.write( """ - The *Clinical segment splitter* app is an implementation of our paper. >Kenichiro Ando, Takashi Okumura, Mamoru Komachi, Hiromasa Horiguchi, Yuji Matsumoto (2022) [Exploring optimal granularity for extractive summarization of unstructured health records: Analysis of the largest multi-institutional archive of health records in Japan.](https://doi.org/10.1371/journal.pdig.0000099) PLOS Digital Health 1(9): e000009. - This app automatically splits Japanese sentences into smaller units representing medical meanings. """ ) st.markdown("") st.markdown("") st.markdown("## ๐Ÿ“Œ Paste document") @st.cache(allow_output_mutation=True) def model_load(): return run_segbot.setup() model,fm,index = model_load() with st.form(key="my_form"): ce, c1, ce, c2, c3 = st.columns([0.07, 1, 0.07, 5, 0.07]) with c1: ModelType = st.radio( "Choose the method of sentence split", ["fullstop & linebreak (Default)", "pySBD"], help=""" At present, you can choose between 2 methods to split your text into sentences. The fullstop & linebreak is naive and robust to noise, but has low accuracy. pySBD is more accurate, but more complex and less robust to noise. """, ) if ModelType == "fullstop & linebreak (Default)": split_method="fullstop" else: split_method="pySBD" with c2: doc = st.text_area( "Paste your text below", height=510, placeholder=""" ใ‚ฐใƒฉใƒ ๆŸ“่‰ฒใ™ใ‚‹ใ‚‚ๆ˜Žใ‚‰ใ‹ใช่ŒใŒ่ฆ‹ใคใ‹ใ‚‰ใšใ€ ้ซ„ๆถฒๅŸน้คŠใงใ‚‚ๅ„ชไฝใช่ŒใฏๅŸน้คŠใ•ใ‚Œใชใ‹ใฃใŸใ€‚ ็ดฐ่Œๆ€ง้ซ„่†œ็‚Žใซๅฏพใ™ใ‚‹ใ‚ฐใƒฉใƒ ๆŸ“่‰ฒใฎๆ„Ÿๅบฆใฏ60%็จ‹ๅบฆใงใ‚ใ‚Šใ€ๅŸน้คŠใซ้–ขใ—ใฆใ‚‚ๆ„Ÿๅบฆใฏ้ซ˜ใใชใ„ใ€‚ ใพใŸ้ซ„ๆถฒไธญใฎ็ณ–ใฏใ‚‚ใ†ๅฐ‘ใ—ๆธ›ใ‚‹ใฎใงใฏใชใ„ใ ใ‚ใ†ใ‹ใ€‚็ขบๅฎš่จบๆ–ญใฏใคใ‹ใชใ„ใ‚‚ใฎใฎใ€ๆœ€ใ‚‚็–‘ใ‚ใ—ใ„็–พๆ‚ฃใงใ‚ใฃใŸใ€‚ ่ตทๅ› ่ŒใฏMRSA,่…ธๅ†…็ดฐ่Œ็ญ‰ใ‚’ๅบƒๅŸŸใซใ‚ซใƒใƒผใ™ใ‚‹ใŸใ‚ใƒใƒณใ‚ณใƒžใ‚คใ‚ทใƒณ,ใƒกใƒญใƒšใƒใƒ (้ซ„่†œ็‚Ždose)ใจใ—ใŸใ€‚ """, ) submit_button = st.form_submit_button(label="Go to split ๐Ÿ‘") if not submit_button: st.stop() keywords = run_segbot.generate(doc, model, fm, index, split_method) st.markdown("## Results") st.header("") cs, c1, c2, c3, cLast = st.columns([2, 1.5, 1.5, 1.5, 2]) st.header("") df = DataFrame(keywords) df.index += 1 df.columns = ['Segment'] print(df) with c1: CSVButton2 = download_button(keywords, "Data.csv", "๐Ÿ“ฅ Download (.csv)") with c2: CSVButton2 = download_button(keywords, "Data.txt", "๐Ÿ“ฅ Download (.txt)") with c3: CSVButton2 = download_button(keywords, "Data.json", "๐Ÿ“ฅ Download (.json)") #with c2: st.table(df)