Spaces:
Sleeping
Sleeping
File size: 6,674 Bytes
f053717 9eff11a f053717 9eff11a f053717 9eff11a f053717 9eff11a f053717 9eff11a f053717 9eff11a f053717 9eff11a f053717 9eff11a f053717 9eff11a f053717 9eff11a f053717 9eff11a f053717 9eff11a f053717 9eff11a f053717 9eff11a f053717 9eff11a f053717 9eff11a f053717 9eff11a f053717 9eff11a f053717 9eff11a f053717 9eff11a f053717 9eff11a f053717 9eff11a f053717 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
import gradio as gr
import pandas as pd
import os
import uuid
import datetime
import logging
from huggingface_hub import hf_hub_download, upload_file, list_repo_tree
from dotenv import load_dotenv
load_dotenv()
# Configuration
HF_INPUT_DATASET = os.getenv("HF_INPUT_DATASET")
HF_INPUT_DATASET_PATH = os.getenv("HF_INPUT_DATASET_PATH")
HF_INPUT_DATASET_ID_COLUMN = os.getenv("HF_INPUT_DATASET_ID_COLUMN")
HF_INPUT_DATASET_COLUMN_A = os.getenv("HF_INPUT_DATASET_COLUMN_A")
HF_INPUT_DATASET_COLUMN_B = os.getenv("HF_INPUT_DATASET_COLUMN_B")
HF_OUTPUT_DATASET = os.getenv("HF_OUTPUT_DATASET")
HF_OUTPUT_DATASET_DIR = os.getenv("HF_OUTPUT_DATASET_DIR")
INSTRUCTIONS = """
# Pairwise Model Output Labeling
Please compare the two model outputs shown below and select which one you think is better.
- Choose "Left is better" if the left output is superior
- Choose "Right is better" if the right output is superior
- Choose "Tie" if they are equally good or bad
- Choose "Can't choose" if you cannot make a determination
"""
class PairwiseLabeler:
def __init__(self):
self.df = self.read_hf_dataset()
self.results = {}
def __len__(self):
return len(self.df)
def read_hf_dataset(self) -> pd.DataFrame:
try:
local_file = hf_hub_download(repo_id=HF_INPUT_DATASET, repo_type="dataset", filename=HF_INPUT_DATASET_PATH)
if local_file.endswith(".json"):
return pd.read_json(local_file)
elif local_file.endswith(".jsonl"):
return pd.read_json(local_file, orient="records", lines=True)
elif local_file.endswith(".csv"):
return pd.read_csv(local_file)
elif local_file.endswith(".parquet"):
return pd.read_parquet(local_file)
else:
raise ValueError(f"Unsupported file type: {local_file}")
except Exception as e:
logging.error(f"Couldn't read HF dataset from {HF_INPUT_DATASET_PATH}. Using sample data instead.")
sample_data = {
HF_INPUT_DATASET_ID_COLUMN: [f"sample_{i}" for i in range(5)],
HF_INPUT_DATASET_COLUMN_A: [f"This is sample generation A {i}" for i in range(5)],
HF_INPUT_DATASET_COLUMN_B: [f"This is sample generation B {i}" for i in range(5)],
}
return pd.DataFrame(sample_data)
def get_current_pair(self, user_id, user_index):
if user_index >= len(self.df):
return None, None, None
item = self.df.iloc[user_index]
item_id = item.get(HF_INPUT_DATASET_ID_COLUMN, f"item_{user_index}")
left_text = item.get(HF_INPUT_DATASET_COLUMN_A, "")
right_text = item.get(HF_INPUT_DATASET_COLUMN_B, "")
return item_id, left_text, right_text
def submit_judgment(self, user_id, user_index, item_id, left_text, right_text, choice):
if item_id is None:
return None, None, None, user_index
# Store user votes uniquely
if user_id not in self.results:
self.results[user_id] = []
# Check if user already voted for this item
existing_vote = next((r for r in self.results[user_id] if r["item_id"] == item_id), None)
if existing_vote:
existing_vote["judgment"] = choice
existing_vote["timestamp"] = datetime.datetime.now().isoformat()
else:
self.results[user_id].append({
"item_id": item_id,
"generation_a": left_text,
"generation_b": right_text,
"judgment": choice,
"timestamp": datetime.datetime.now().isoformat(),
"labeler_id": user_id
})
# Save immediately
self.save_results(user_id)
# Move to the next item
user_index += 1
next_id, next_left, next_right = self.get_current_pair(user_id, user_index)
return next_id, next_left, next_right, user_index
def save_results(self, user_id):
if user_id not in self.results or not self.results[user_id]:
return
try:
results_df = pd.DataFrame(self.results[user_id])
filename = f"results_{user_id}.jsonl"
results_df.to_json(filename, orient="records", lines=True)
# Push to Hugging Face Hub
upload_file(repo_id=HF_OUTPUT_DATASET, repo_type="dataset",
path_in_repo=os.path.join(HF_OUTPUT_DATASET_DIR, filename),
path_or_fileobj=filename)
os.remove(filename)
except Exception as e:
logging.error(f"Error saving results: {e}")
# Initialize the labeler
labeler = PairwiseLabeler()
# Gradio UI
with gr.Blocks() as app:
gr.Markdown(INSTRUCTIONS)
user_id = gr.Textbox(label="Enter your user ID", interactive=True)
user_index = gr.State(0) # Track each user's progress
with gr.Row():
with gr.Column():
left_output = gr.Textbox(label="Model Output A", lines=10, interactive=False)
with gr.Column():
right_output = gr.Textbox(label="Model Output B", lines=10, interactive=False)
item_id = gr.Textbox(visible=False)
with gr.Row():
left_btn = gr.Button("⬅️ A is better")
right_btn = gr.Button("➡️ B is better")
tie_btn = gr.Button("🤝 Tie")
cant_choose_btn = gr.Button("🤔 Can't choose")
def load_first_pair(user_id):
if not user_id:
return None, None, None, 0
return labeler.get_current_pair(user_id, 0) + (0,)
def judge(choice, user_id, user_index, item_id, left_text, right_text):
return labeler.submit_judgment(user_id, user_index, item_id, left_text, right_text, choice)
user_id.submit(load_first_pair, inputs=[user_id], outputs=[item_id, left_output, right_output, user_index])
left_btn.click(judge, inputs=[gr.State("A is better"), user_id, user_index, item_id, left_output, right_output], outputs=[item_id, left_output, right_output, user_index])
right_btn.click(judge, inputs=[gr.State("B is better"), user_id, user_index, item_id, left_output, right_output], outputs=[item_id, left_output, right_output, user_index])
tie_btn.click(judge, inputs=[gr.State("Tie"), user_id, user_index, item_id, left_output, right_output], outputs=[item_id, left_output, right_output, user_index])
cant_choose_btn.click(judge, inputs=[gr.State("Can't choose"), user_id, user_index, item_id, left_output, right_output], outputs=[item_id, left_output, right_output, user_index])
if __name__ == "__main__":
app.launch()
|