File size: 3,860 Bytes
a578005
80f304b
a578005
 
88424d7
a578005
80f304b
00b5c9c
5c7750f
a578005
 
2f36d79
a578005
80f304b
a578005
 
 
88424d7
 
 
5c7750f
 
a578005
88424d7
5c7750f
 
80f304b
 
a578005
5c7750f
 
 
 
 
 
 
 
 
88424d7
 
00b5c9c
 
80f304b
 
 
 
 
 
9ad151d
88424d7
80f304b
88424d7
80f304b
 
 
 
88424d7
80f304b
 
 
5c7750f
 
 
 
20be6f0
5c7750f
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import pandas as pd
import streamlit as st
import re

st.set_page_config(page_icon='🍃', page_title='MRC for Legal Document Dataset checker', layout='wide', initial_sidebar_state="collapsed")

st.markdown("<h2 style='text-align: center;'>Investigation Legal Dataset checker for Machine Reading Comprehension</h2>", unsafe_allow_html=True)

df = pd.read_csv(filepath_or_buffer='./Legal_AbstractiveA.csv')

if 'idx' not in st.session_state:
    st.session_state.idx = 0

st.markdown(f"<h4 style='text-align: center;'>Sample {st.session_state.idx + 1}/{len(df)}</h4>", unsafe_allow_html=True)

col_1, col_2, col_3, col_4, col_5, col_6, col_7, col_8, col_9, col_10 = st.columns([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

btn_previous = col_1.button(label=':arrow_backward: Previous sample', use_container_width=True)
btn_next = col_2.button(label='Next sample :arrow_forward:', use_container_width=True)
btn_save = col_3.button(label=':heavy_check_mark: Save change', use_container_width=True)
txt_goto = col_5.selectbox(label='Sample', label_visibility='collapsed', options=list(range(1, len(df) + 1)))
btn_goto = col_6.button(label=':fast_forward: Move to', use_container_width=True)

if len(df) != 0:
    col_x1, col_x2 = st.columns([8.5, 1.5])
    txt_context = col_x1.text_area(height=300, label='Your context:', value=df['context'][st.session_state.idx])
    txt_question = st.text_area(height=100, label='Your question:', value=df['question'][st.session_state.idx])
    txt_answer = st.text_area(height=100, label='Your answer:', value=df['answer'][st.session_state.idx])

    options = ['Never been evaluated', 'Bad', 'Acceptable', 'Good']
    criteria_1_value = df['criteria_1'][st.session_state.idx] if 'criteria_1' in df.columns else 'Never been evaluated'
    criteria_2_value = df['criteria_2'][st.session_state.idx] if 'criteria_2' in df.columns else 'Never been evaluated'
    criteria_3_value = df['criteria_3'][st.session_state.idx] if 'criteria_3' in df.columns else 'Never been evaluated'
    
    criteria_1 = col_x2.selectbox(label='Are the questions natural, comprehensive, and appropriate to the content?', options=['Never been evaluated', 'Bad', 'Acceptable', 'Good'], index=options.index(criteria_1_value))
    criteria_2 = col_x2.selectbox(label='Is the answer correct, clear, and fluent?', options=['Never been evaluated', 'Bad', 'Acceptable', 'Good'], index=options.index(criteria_2_value))
    criteria_3 = col_x2.selectbox(label='Do the question and answer pairs match each other?', options=['Never been evaluated', 'Bad', 'Acceptable', 'Good'], index=options.index(criteria_3_value))

    if txt_answer.strip() and txt_context.strip():
        highlighted_context = re.sub(re.escape(txt_answer), "<mark>" + txt_answer + "</mark>", txt_context, flags=re.IGNORECASE)
        st.markdown(highlighted_context, unsafe_allow_html=True)

    if btn_previous:
        if st.session_state.idx > 0:
            st.session_state.idx -= 1
            st.rerun()
        else:
            pass

    if btn_next:
        if st.session_state.idx <= (len(df) - 1):
            st.session_state.idx += 1
            st.rerun()
        else:
            pass
    
    if btn_save:
        df['context'][st.session_state.idx] = txt_context
        df['question'][st.session_state.idx] = txt_question
        df['answer'][st.session_state.idx] = txt_answer

        df['criteria_1'][st.session_state.idx] = criteria_1
        df['criteria_2'][st.session_state.idx] = criteria_2
        df['criteria_3'][st.session_state.idx] = criteria_3
        btn_download = col_4.download_button(data=df.to_csv(), label=':arrow_down_small: Download file', use_container_width=True, file_name="checked.csv", mime="text/csv")
        df.to_csv(path_or_buf='./Legal_AbstractiveA.csv', index=None)

    if btn_goto:
        st.session_state.idx = txt_goto - 1
        st.rerun()