labeling-summarization

Sleeping

File size: 6,674 Bytes

import gradio as gr
import pandas as pd
import os
import uuid
import datetime
import logging
from huggingface_hub import hf_hub_download, upload_file, list_repo_tree
from dotenv import load_dotenv

load_dotenv()

# Configuration
HF_INPUT_DATASET = os.getenv("HF_INPUT_DATASET")
HF_INPUT_DATASET_PATH = os.getenv("HF_INPUT_DATASET_PATH")
HF_INPUT_DATASET_ID_COLUMN = os.getenv("HF_INPUT_DATASET_ID_COLUMN")
HF_INPUT_DATASET_COLUMN_A = os.getenv("HF_INPUT_DATASET_COLUMN_A")
HF_INPUT_DATASET_COLUMN_B = os.getenv("HF_INPUT_DATASET_COLUMN_B")
HF_OUTPUT_DATASET = os.getenv("HF_OUTPUT_DATASET")
HF_OUTPUT_DATASET_DIR = os.getenv("HF_OUTPUT_DATASET_DIR")

INSTRUCTIONS = """
# Pairwise Model Output Labeling
Please compare the two model outputs shown below and select which one you think is better.
- Choose "Left is better" if the left output is superior
- Choose "Right is better" if the right output is superior
- Choose "Tie" if they are equally good or bad
- Choose "Can't choose" if you cannot make a determination
"""
 
class PairwiseLabeler:
    def __init__(self):
        self.df = self.read_hf_dataset()
        self.results = {}
    
    def __len__(self):
        return len(self.df)

    def read_hf_dataset(self) -> pd.DataFrame:
        try:
            local_file = hf_hub_download(repo_id=HF_INPUT_DATASET, repo_type="dataset", filename=HF_INPUT_DATASET_PATH)
            if local_file.endswith(".json"):
                return pd.read_json(local_file)
            elif local_file.endswith(".jsonl"):
                return pd.read_json(local_file, orient="records", lines=True)
            elif local_file.endswith(".csv"):
                return pd.read_csv(local_file)
            elif local_file.endswith(".parquet"):
                return pd.read_parquet(local_file)
            else:
                raise ValueError(f"Unsupported file type: {local_file}")
        except Exception as e:
            logging.error(f"Couldn't read HF dataset from {HF_INPUT_DATASET_PATH}. Using sample data instead.")
            sample_data = {
                HF_INPUT_DATASET_ID_COLUMN: [f"sample_{i}" for i in range(5)],
                HF_INPUT_DATASET_COLUMN_A: [f"This is sample generation A {i}" for i in range(5)],
                HF_INPUT_DATASET_COLUMN_B: [f"This is sample generation B {i}" for i in range(5)],
            }
            return pd.DataFrame(sample_data)

    def get_current_pair(self, user_id, user_index):
        if user_index >= len(self.df):
            return None, None, None

        item = self.df.iloc[user_index]
        item_id = item.get(HF_INPUT_DATASET_ID_COLUMN, f"item_{user_index}")
        left_text = item.get(HF_INPUT_DATASET_COLUMN_A, "")
        right_text = item.get(HF_INPUT_DATASET_COLUMN_B, "")
        
        return item_id, left_text, right_text

    def submit_judgment(self, user_id, user_index, item_id, left_text, right_text, choice):
        if item_id is None:
            return None, None, None, user_index
        
        # Store user votes uniquely
        if user_id not in self.results:
            self.results[user_id] = []

        # Check if user already voted for this item
        existing_vote = next((r for r in self.results[user_id] if r["item_id"] == item_id), None)
        
        if existing_vote:
            existing_vote["judgment"] = choice
            existing_vote["timestamp"] = datetime.datetime.now().isoformat()
        else:
            self.results[user_id].append({
                "item_id": item_id,
                "generation_a": left_text,
                "generation_b": right_text,
                "judgment": choice,
                "timestamp": datetime.datetime.now().isoformat(),
                "labeler_id": user_id
            })
        
        # Save immediately
        self.save_results(user_id)

        # Move to the next item
        user_index += 1
        next_id, next_left, next_right = self.get_current_pair(user_id, user_index)
        return next_id, next_left, next_right, user_index

    def save_results(self, user_id):
        if user_id not in self.results or not self.results[user_id]:
            return

        try:
            results_df = pd.DataFrame(self.results[user_id])
            filename = f"results_{user_id}.jsonl"
            results_df.to_json(filename, orient="records", lines=True)

            # Push to Hugging Face Hub
            upload_file(repo_id=HF_OUTPUT_DATASET, repo_type="dataset",
                        path_in_repo=os.path.join(HF_OUTPUT_DATASET_DIR, filename),
                        path_or_fileobj=filename)

            os.remove(filename)
        except Exception as e:
            logging.error(f"Error saving results: {e}")

# Initialize the labeler
labeler = PairwiseLabeler()

# Gradio UI
with gr.Blocks() as app:
    gr.Markdown(INSTRUCTIONS)

    user_id = gr.Textbox(label="Enter your user ID", interactive=True)
    user_index = gr.State(0)  # Track each user's progress

    with gr.Row():
        with gr.Column():
            left_output = gr.Textbox(label="Model Output A", lines=10, interactive=False)
        with gr.Column():
            right_output = gr.Textbox(label="Model Output B", lines=10, interactive=False)
    
    item_id = gr.Textbox(visible=False)
    
    with gr.Row():
        left_btn = gr.Button("⬅️ A is better")
        right_btn = gr.Button("➡️ B is better")
        tie_btn = gr.Button("🤝 Tie")
        cant_choose_btn = gr.Button("🤔 Can't choose")
    
    def load_first_pair(user_id):
        if not user_id:
            return None, None, None, 0
        return labeler.get_current_pair(user_id, 0) + (0,)

    def judge(choice, user_id, user_index, item_id, left_text, right_text):
        return labeler.submit_judgment(user_id, user_index, item_id, left_text, right_text, choice)

    user_id.submit(load_first_pair, inputs=[user_id], outputs=[item_id, left_output, right_output, user_index])
    left_btn.click(judge, inputs=[gr.State("A is better"), user_id, user_index, item_id, left_output, right_output], outputs=[item_id, left_output, right_output, user_index])
    right_btn.click(judge, inputs=[gr.State("B is better"), user_id, user_index, item_id, left_output, right_output], outputs=[item_id, left_output, right_output, user_index])
    tie_btn.click(judge, inputs=[gr.State("Tie"), user_id, user_index, item_id, left_output, right_output], outputs=[item_id, left_output, right_output, user_index])
    cant_choose_btn.click(judge, inputs=[gr.State("Can't choose"), user_id, user_index, item_id, left_output, right_output], outputs=[item_id, left_output, right_output, user_index])

if __name__ == "__main__":
    app.launch()