Spaces:
Sleeping
Sleeping
File size: 2,952 Bytes
a578005 88424d7 a578005 88424d7 00b5c9c 88424d7 a578005 88424d7 00b5c9c 88424d7 00b5c9c 88424d7 a578005 2f36d79 a578005 2f36d79 a578005 88424d7 a578005 88424d7 a578005 9ad151d a578005 88424d7 00b5c9c 9ad151d 88424d7 2f36d79 88424d7 9ad151d a578005 88424d7 2f36d79 9ad151d 88424d7 2f36d79 88424d7 9ad151d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import streamlit as st
import pandas as pd
import re
st.set_page_config(page_icon='🍃', page_title='MRC for Legal Document Dataset checker', layout='wide', initial_sidebar_state="collapsed")
# start processing events
def load_data(file_uploader):
if file_uploader is not None:
return pd.read_csv(file_uploader)
else:
return pd.DataFrame(columns=['context', 'question', 'answer'])
def convert_df(df):
# IMPORTANT: Cache the conversion to prevent computation on every rerun
return df.to_csv().encode("utf-8")
# end processing events
st.markdown("<h1 style='text-align: center;'>Investigation Legal Dataset checker for Machine Reading Comprehension</h1>", unsafe_allow_html=True)
file = st.file_uploader(label='Upload your file here:', type=['csv'], accept_multiple_files=False, label_visibility='hidden')
df = load_data(file_uploader=file)
if 'df' not in st.session_state:
st.session_state.df = df
if 'idx' not in st.session_state:
st.session_state.idx = 0
st.markdown(f"<h3 style='text-align: center;'>Sample {st.session_state.idx + 1}/{len(df)}</h3>", unsafe_allow_html=True)
col_1, col_2, col_3, col_4, col_5, col_6, col_7, col_8, col_9, col_10 = st.columns([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
btn_previous = col_1.button(label=':arrow_backward: Previous sample', use_container_width=True)
btn_next = col_2.button(label='Next sample :arrow_forward:', use_container_width=True)
btn_save = col_3.button(label=':heavy_check_mark: Save change', use_container_width=True)
# txt_goto = col_4.selectbox(label='None', options=[np.array(range(len(df)))], label_visibility='collapsed')
if len(df) != 0:
index = st.session_state.idx
print(f"Length: {len(st.session_state.df)}")
txt_context = st.text_area(height=300, label='Your context:', value=st.session_state.df['context'][index])
txt_question = st.text_area(height=100, label='Your question:', value=st.session_state.df['question'][index])
txt_answer = st.text_area(height=100, label='Your answer:', value=st.session_state.df['answer'][index])
if txt_answer.strip() and txt_context.strip():
highlighted_context = re.sub(re.escape(txt_answer), "<mark>" + txt_answer + "</mark>", txt_context, flags=re.IGNORECASE)
st.markdown(highlighted_context, unsafe_allow_html=True)
if btn_next:
if index < len(df) - 1:
st.session_state.idx += 1
# st.rerun()
if btn_save:
st.session_state.df['context'][index] = txt_context
st.session_state.df['question'][index] = txt_question
st.session_state.df['answer'][index] = txt_answer
csv_file = convert_df(df=st.session_state.df)
btn_download = col_4.download_button(data=csv_file, label=':arrow_down_small: Download file', use_container_width=True, file_name="large_df.csv", mime="text/csv")
if btn_previous:
if index > 0:
st.session_state.idx -= 1
# st.rerun() |