labeling-summarization

Sleeping

App Files Files Community

saridormi commited on Mar 26

Commit

f053717

1 Parent(s): ff17709

initial commit

Browse files

Files changed (4) hide show

.env +8 -0
.gitignore +39 -0
app.py +209 -0
requirements.txt +5 -0

.env ADDED Viewed

	@@ -0,0 +1,8 @@

+#HF_TOKEN=??? <-- also set this secret in HF space
+HF_INPUT_DATASET="JetBrains-Research/lca-results"
+HF_INPUT_DATASET_PATH="commit_message_generation/predictions/o1-preview-2024-09-12/predictions.jsonl"
+HF_INPUT_DATASET_ID_COLUMN="hash"
+HF_INPUT_DATASET_COLUMN_A="reference"
+HF_INPUT_DATASET_COLUMN_B="prediction"
+HF_OUTPUT_DATASET="saridormi/labels"
+HF_OUTPUT_DATASET_DIR="cmg"

.gitignore ADDED Viewed

	@@ -0,0 +1,39 @@

+# Virtual Environment
+.venv/
+venv/
+ENV/
+# VS Code
+.vscode/*
+!.vscode/launch.json
+!.vscode/settings.json
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Logs
+logs/
+*.log
+# Local results
+results/
+temp.jsonl

app.py ADDED Viewed

	@@ -0,0 +1,209 @@

+import gradio as gr
+import pandas as pd
+import os
+import uuid
+import datetime
+import logging
+from huggingface_hub import hf_hub_download, upload_file, list_repo_tree
+from dotenv import load_dotenv
+load_dotenv()
+# Configuration
+HF_INPUT_DATASET = os.getenv("HF_INPUT_DATASET")
+HF_INPUT_DATASET_PATH = os.getenv("HF_INPUT_DATASET_PATH")
+HF_INPUT_DATASET_ID_COLUMN = os.getenv("HF_INPUT_DATASET_ID_COLUMN")
+HF_INPUT_DATASET_COLUMN_A = os.getenv("HF_INPUT_DATASET_COLUMN_A")
+HF_INPUT_DATASET_COLUMN_B = os.getenv("HF_INPUT_DATASET_COLUMN_B")
+HF_OUTPUT_DATASET = os.getenv("HF_OUTPUT_DATASET")
+HF_OUTPUT_DATASET_DIR = os.getenv("HF_OUTPUT_DATASET_DIR")
+INSTRUCTIONS = """
+# Pairwise Model Output Labeling
+Please compare the two model outputs shown below and select which one you think is better.
+- Choose "Left is better" if the left output is superior
+- Choose "Right is better" if the right output is superior
+- Choose "Tie" if they are equally good or bad
+- Choose "Can't choose" if you cannot make a determination
+"""
+SAVE_EVERY_N_EXAMPLES = 5
+class PairwiseLabeler:
+    def __init__(self):
+        self.current_index = 0
+        self.results = []
+        self.df = self.read_hf_dataset()
+    def __len__(self):
+        return len(self.df)
+    def read_hf_dataset(self) -> pd.DataFrame:
+        try:
+            local_file = hf_hub_download(repo_id=HF_INPUT_DATASET, repo_type="dataset", filename=HF_INPUT_DATASET_PATH)
+            if local_file.endswith(".json"):
+                return pd.read_json(local_file)
+            elif local_file.endswith(".jsonl"):
+                return pd.read_json(local_file, orient="records",lines=True)
+            elif local_file.endswith(".csv"):
+                return pd.read_csv(local_file)
+            elif local_file.endswith(".parquet"):
+                return pd.read_parquet(local_file)
+            else:
+                raise ValueError(f"Unsupported file type: {local_file}")
+        except Exception as e:
+            # Fallback to sample data if loading fails
+            logging.error(f"Couldn't read HF dataset from {HF_INPUT_DATASET_PATH}. Using sample data instead.")
+            sample_data = {
+                HF_INPUT_DATASET_ID_COLUMN: [f"sample_{i}" for i in range(SAVE_EVERY_N_EXAMPLES)],
+                HF_INPUT_DATASET_COLUMN_A: [f"This is sample generation A {i}" for i in range(SAVE_EVERY_N_EXAMPLES)],
+                HF_INPUT_DATASET_COLUMN_B: [f"This is sample generation B {i}" for i in range(SAVE_EVERY_N_EXAMPLES)],
+            }
+            return pd.DataFrame(sample_data)
+    def get_current_pair(self):
+        if self.current_index >= len(self.df):
+            return None, None, None
+        item = self.df.iloc[self.current_index]
+        item_id = item.get(HF_INPUT_DATASET_ID_COLUMN, f"item_{self.current_index}")
+        left_text = item.get(HF_INPUT_DATASET_COLUMN_A, "")
+        right_text = item.get(HF_INPUT_DATASET_COLUMN_B, "")
+        return item_id, left_text, right_text
+    def submit_judgment(self, item_id, left_text, right_text, choice):
+        if item_id is None:
+            return item_id, left_text, right_text, self.current_index
+        # Record the judgment
+        result = {
+            "item_id": item_id,
+            "generation_a": left_text,
+            "generation_b": right_text,
+            "judgment": choice,
+            "timestamp": datetime.datetime.now().isoformat(),
+            "labeler_id": str(uuid.uuid4())[:8]  # Anonymous ID for the labeling session
+        }
+        self.results.append(result)
+        # Move to next item
+        self.current_index += 1
+        # Save results periodically
+        if len(self.results) % SAVE_EVERY_N_EXAMPLES == 0:
+            self.save_results()
+        # Get next pair
+        next_id, next_left, next_right = self.get_current_pair()
+        return next_id, next_left, next_right, self.current_index
+    def save_results(self):
+        if not self.results:
+            return
+        try:
+            # Convert results to dataset format
+            results_df = pd.DataFrame(self.results)
+            results_df.to_json("temp.jsonl", orient="records", lines=True)
+            # Push to Hugging Face Hub
+            try:
+                num_files = len([_ for _ in list_repo_tree(repo_id=HF_OUTPUT_DATASET, repo_type="dataset", path_in_repo=HF_OUTPUT_DATASET_DIR)])
+            except Exception as e:
+                num_files = 0
+            upload_file(repo_id=HF_OUTPUT_DATASET, repo_type="dataset", path_in_repo=os.path.join(HF_OUTPUT_DATASET_DIR, f"results_{num_files+1}.jsonl"), path_or_fileobj="temp.jsonl")
+            os.remove("temp.jsonl")
+            self.results = []
+            logging.info(f"Saved {len(self.results)} results to {HF_OUTPUT_DATASET}")
+        except Exception as e:
+            logging.error(f"Error saving results: {e}")
+# Initialize the labeler
+labeler = PairwiseLabeler()
+# Get the first pair
+initial_id, initial_left, initial_right = labeler.get_current_pair()
+with gr.Blocks() as app:
+    gr.Markdown(INSTRUCTIONS)
+    with gr.Row():
+        with gr.Column():
+            left_output = gr.Textbox(
+                value=initial_left,
+                label="Model Output A",
+                lines=10,
+                interactive=False
+            )
+        with gr.Column():
+            right_output = gr.Textbox(
+                value=initial_right,
+                label="Model Output B",
+                lines=10,
+                interactive=False
+            )
+    item_id = gr.Textbox(value=initial_id, visible=False)
+    with gr.Row():
+        left_btn = gr.Button("⬅️ A is better", variant="primary")
+        right_btn = gr.Button("➡️ B is better", variant="primary")
+        tie_btn = gr.Button("🤝 Tie", variant="primary")
+        cant_choose_btn = gr.Button("🤔 Can't choose")
+    current_sample_sld = gr.Slider(minimum=0, maximum=len(labeler), step=1,
+                                   value=labeler.current_index,
+                                   interactive=False,
+                                   label='sample_ind',
+                                   info=f"Samples labeled (out of {len(labeler)})",
+                                   show_label=False,
+                                   container=False,
+                                   scale=5)
+    def judge_left(item_id, left_text, right_text):
+        return judge("A is better", item_id, left_text, right_text)
+    def judge_right(item_id, left_text, right_text):
+        return judge("B is better", item_id, left_text, right_text)
+    def judge_tie(item_id, left_text, right_text):
+        return judge("Tie", item_id, left_text, right_text)
+    def judge_cant_choose(item_id, left_text, right_text):
+        return judge("Can't choose", item_id, left_text, right_text)
+    def judge(choice, item_id, left_text, right_text):
+        new_id, new_left, new_right, new_index = labeler.submit_judgment(
+            item_id, left_text, right_text, choice
+        )
+        return new_id, new_left, new_right, new_index
+    left_btn.click(
+        judge_left,
+        inputs=[item_id, left_output, right_output],
+        outputs=[item_id, left_output, right_output, current_sample_sld]
+    )
+    right_btn.click(
+        judge_right,
+        inputs=[item_id, left_output, right_output],
+        outputs=[item_id, left_output, right_output, current_sample_sld]
+    )
+    tie_btn.click(
+        judge_tie,
+        inputs=[item_id, left_output, right_output],
+        outputs=[item_id, left_output, right_output, current_sample_sld]
+    )
+    cant_choose_btn.click(
+        judge_cant_choose,
+        inputs=[item_id, left_output, right_output],
+        outputs=[item_id, left_output, right_output, current_sample_sld]
+    )
+if __name__ == "__main__":
+    app.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio>=4.0.0
+pandas>=1.3.0
+datasets>=2.0.0
+huggingface-hub>=0.12.0
+python-dotenv>=1.0.0