Spaces:

ntphuc149
/

QA_Data_Validator

Sleeping

Truong-Phuc Nguyen

Update app.py

462049e verified 11 months ago

1.74 kB

	import streamlit as st
	import pandas as pd
	import re

	st.set_page_config(layout='wide')

	def load_data():
	return pd.read_csv(filepath_or_buffer='./data.csv')

	df = load_data()

	if 'idx' not in st.session_state:
	st.session_state.idx = 0

	st.markdown("<h1 style='text-align: center;'>Investigation Legal Documents Dataset Checker</h1>", unsafe_allow_html=True)


	col_1, col_2, col_3, col_4, col_5, col_6, col_7, col_8, col_9, col_10 = st.columns([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
	btn_prev = col_1.button(label='Previous sample', use_container_width=True)
	btn_next = col_2.button(label='Next sample', use_container_width=True)
	btn_save = col_3.button(label='Save changes', use_container_width=True)

	if btn_prev:
	if st.session_state.idx > 0:
	st.session_state.idx -= 1

	if btn_next:
	if st.session_state.idx < len(df) - 1:
	st.session_state.idx += 1

	st.markdown(f"<h3 style='text-align: center;'>Sample: {st.session_state.idx+1}/{len(df)}</h3>", unsafe_allow_html=True)

	context = st.text_area(label='Your context: ', value=df['contexts'][st.session_state.idx], height=300)
	question = st.text_area(label='Your question: ', value=df['questions'][st.session_state.idx], height=100)
	answer = st.text_area(label='Your answer: ', value=df['answers'][st.session_state.idx], height=100)

	if answer.strip() and context.strip():
	highlighted_context = re.sub(re.escape(answer), "<mark>" + answer + "</mark>", context, flags=re.IGNORECASE)
	st.markdown(highlighted_context, unsafe_allow_html=True)

	if btn_save:
	df.loc[st.session_state.idx, 'contexts'] = context
	df.loc[st.session_state.idx, 'questions'] = question
	df.loc[st.session_state.idx, 'answers'] = answer

	df.to_csv('./data.csv', index=False)