parser-leaderboard / tabs /arena_sxs.py
jojortz's picture
add SustainabilityReport sample
da8f0c9
import gradio as gr
from utils.pdf_utils import update_page, load_pdf
from extractors.model_runner import models, run_extract_parallel
sample_files = {
"Portfolio Statement": "data/SamplePortfolioStatement.pdf",
"Sustainabililty Report": "data/SampleSustainabilityReport.pdf",
"Table Of Contents": "data/TableOfContents.pdf",
}
def update_dropdowns(model_a_choice, model_b_choice):
if model_a_choice == model_b_choice:
options_b = [m for m in models if m != model_a_choice]
return gr.update(choices=options_b, value=options_b[0] if options_b else None)
else:
options_b = [m for m in models if m != model_a_choice]
return gr.update(choices=options_b, value=model_b_choice)
def sync_models(model_a_choice, model_b_choice):
return update_dropdowns(model_a_choice, model_b_choice)
def dummy_function_a(model_a_choice):
return f"Model A selected: {model_a_choice}"
def dummy_function_b(model_b_choice):
return f"Model B selected: {model_b_choice}"
def update_button(file):
return gr.update(interactive=bool(file))
def update_vote_button(output):
is_active = bool(output)
return [gr.update(interactive=is_active) for _ in range(4)]
def clear_outputs():
return "", ""
def load_selected_pdf(file_path):
img, page_info, page_num = load_pdf(file_path)
return img, page_info, page_num
def arena_sxs():
with gr.Blocks() as arena_block:
gr.Markdown("# Rules")
gr.Markdown("- Choose a Sample PDF or upload a your own to parse with two chosen models (e.g., Llama, Unstructured, ChatGPT, Claude) and vote for the better one!")
gr.Markdown("- You can parse multiple files until you identify a winner.")
gr.Markdown("## 1. Upload a file or choose a sample.")
gr.Markdown("Only PDF files supported.")
with gr.Row():
with gr.Column(scale=2):
with gr.Row():
pdf_radio = gr.Radio(choices=list(sample_files.keys()), label="Choose a Sample PDF")
pdf_file = gr.File(type="filepath", label="Upload PDF", file_types=[".pdf"])
pdf_image = gr.Image(label="PDF Page")
page_info = gr.Textbox(label="")
current_page = gr.State(value=0)
file_path_state = gr.State(value=None) # Unified file path state
with gr.Row():
prev_button = gr.Button("Previous")
next_button = gr.Button("Next")
gr.Markdown("---")
gr.Markdown("## 2. Choose two models to compare")
with gr.Blocks():
with gr.Row():
model_a = gr.Dropdown(choices=models, value=models[0], label="")
model_b = gr.Dropdown(choices=[m for m in models if m != models[0]], value=models[1], label="")
with gr.Row():
output_a = gr.Markdown(height=400)
output_b = gr.Markdown(height=400)
with gr.Row():
extract_button = gr.Button("Parse", interactive=False)
# Handlers
def update_pdf_from_source(file_path):
img, page_info, page_num = load_pdf(file_path)
return img, page_info, page_num
def update_pdf_and_button(file=None, radio=None):
file_path = file if file else sample_files.get(radio)
if file_path:
img, page_info, page_num = update_pdf_from_source(file_path)
return img, page_info, 0, file_path, update_button(file_path) # Include file_path in the outputs
return None, "No file selected", 0, None, update_button(None) # Include file_path in the outputs
def handle_file_change(file):
return update_pdf_and_button(file=file)
def handle_radio_change(radio):
return update_pdf_and_button(radio=radio)
# Handle file uploads
pdf_file.change(
fn=handle_file_change,
inputs=pdf_file,
outputs=[pdf_image, page_info, current_page, file_path_state, extract_button]
)
# Handle sample PDF selection from the radio
pdf_radio.change(
fn=handle_radio_change,
inputs=pdf_radio,
outputs=[pdf_image, page_info, current_page, file_path_state, extract_button]
)
# Button interactions
prev_button.click(
fn=lambda file_path, page: update_page(file_path, page, -1),
inputs=[file_path_state, current_page],
outputs=[pdf_image, page_info, current_page]
)
next_button.click(
fn=lambda file_path, page: update_page(file_path, page, 1),
inputs=[file_path_state, current_page],
outputs=[pdf_image, page_info, current_page]
)
extract_button.click(
fn=run_extract_parallel,
inputs=[model_a, model_b, file_path_state], # Use the unified file path state
outputs=[output_a, output_b]
)
extract_button.click(
fn=clear_outputs,
outputs=[output_a, output_b]
)
return arena_block
# with gr.Row():
# vote_model_a_button = gr.Button("πŸ‘ˆ A is better", interactive=False)
# vote_model_b_button = gr.Button("πŸ‘‰ B is better", interactive=False)
# vote_tie_button = gr.Button("🀝 Tie", interactive=False)
# vote_bad_button = gr.Button("πŸ‘Ž Both are bad", interactive=False)
# output_a.change(fn=update_vote_button, inputs=output_a, outputs=[vote_model_a_button, vote_model_b_button, vote_tie_button, vote_bad_button])
# output_b.change(fn=update_vote_button, inputs=output_b, outputs=[vote_model_a_button, vote_model_b_button, vote_tie_button, vote_bad_button])
# # Button Handlers
# vote_model_a_button.click(fn=lambda model_a, model_b: vote_for_model(model_a, Vote.GOOD, model_b, Vote.NEUTRAL), inputs=[model_a, model_b], outputs=[output_a, output_b])
# vote_model_b_button.click(fn=lambda model_a, model_b: vote_for_model(model_a, Vote.NEUTRAL, model_b, Vote.GOOD), inputs=[model_a, model_b], outputs=[output_a, output_b])
# vote_tie_button.click(fn=lambda model_a, model_b: vote_for_model(model_a, Vote.NEUTRAL, model_b, Vote.NEUTRAL), inputs=[model_a, model_b], outputs=[output_a, output_b])
# vote_bad_button.click(fn=lambda model_a, model_b: vote_for_model(model_a, Vote.BAD, model_b, Vote.BAD), inputs=[model_a, model_b], outputs=[output_a, output_b])