Spaces:

ando55
/

clinical_segment_splitter

Runtime error

File size: 3,294 Bytes

import streamlit as st
import numpy as np
from pandas import DataFrame
import run_segbot
from functionforDownloadButtons import download_button
import os
import json

os.system('git clone --depth 1 https://github.com/neologd/mecab-ipadic-neologd.git && cd mecab-ipadic-neologd && ./bin/install-mecab-ipadic-neologd -n -y -u -p $PWD')
os.system('git clone --depth 1 https://github.com/neologd/mecab-unidic-neologd.git && cd mecab-unidic-neologd && ./bin/install-mecab-unidic-neologd -n -y -u -p $PWD')


import glob

for name in glob.glob('./**/mecab-ipadic-neologd', recursive=True):
    print(name)

st.set_page_config(
    page_title="Clinical segment generater",
    page_icon="🚑",
    layout="wide"
)


def _max_width_():
    max_width_str = f"max-width: 1400px;"
    st.markdown(
        f"""
    <style>
    .reportview-container .main .block-container{{
        {max_width_str}
    }}
    </style>    
    """,
        unsafe_allow_html=True,
    )


#_max_width_()

#c30 = st.columns([1,])

#with c30:
# st.image("logo.png", width=400)
st.title("🚑 Clinical segment generater")
st.header("")



with st.expander("ℹ️ - About this app", expanded=True):

    st.write(
        """     
-   The *Clinical segment generater* app is an implementation of [our paper](https://journals.plos.org/digitalhealth/article?id=10.1371/journal.pdig.0000099).
-   It automatically splits Japanese sentences into smaller units representing medical meanings.
	    """
    )

    st.markdown("")

st.markdown("")
st.markdown("## 📌 Paste document")
@st.cache(allow_output_mutation=True)
def model_load():
    return run_segbot.setup()
model,fm,index = model_load()
with st.form(key="my_form"):


    ce, c1, ce, c2, c3 = st.columns([0.07, 1, 0.07, 5, 0.07])
    with c1:
        ModelType = st.radio(
            "Choose the method of sentence split",
            ["fullstop & linebreak (Default)", "pySBD"],
            help="""
            At present, you can choose between 2 methods to split your text into sentences. 

            The fullstop & linebreak is naive and robust to noise, but has low accuracy.
            pySBD is more accurate, but more complex and less robust to noise.
            """,
        )

        if ModelType == "fullstop & linebreak (Default)":
            split_method="fullstop"
            
        else:
            split_method="pySBD"


    with c2:
        doc = st.text_area(
            "Paste your text below",
            height=510,
        )

        submit_button = st.form_submit_button(label="👍 Go to split!")


if not submit_button:
    st.stop()

keywords = run_segbot.generate(doc, model, fm, index, split_method)


st.markdown("## 🎈 Check & download results")

st.header("")


cs, c1, c2, c3, cLast = st.columns([2, 1.5, 1.5, 1.5, 2])

with c1:
    CSVButton2 = download_button(keywords, "Data.csv", "📥 Download (.csv)")
with c2:
    CSVButton2 = download_button(keywords, "Data.txt", "📥 Download (.txt)")
with c3:
    CSVButton2 = download_button(keywords, "Data.json", "📥 Download (.json)")

st.header("")

#df = DataFrame(keywords, columns=["Keyword/Keyphrase", "Relevancy"])
df = DataFrame(keywords)
df.index += 1
df.columns = ['Segment']
print(df)
# Add styling

#c1, c2, c3 = st.columns([1, 3, 1])

#with c2:
st.table(df)