saridormi commited on
Commit
f053717
·
1 Parent(s): ff17709

initial commit

Browse files
Files changed (4) hide show
  1. .env +8 -0
  2. .gitignore +39 -0
  3. app.py +209 -0
  4. requirements.txt +5 -0
.env ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #HF_TOKEN=??? <-- also set this secret in HF space
2
+ HF_INPUT_DATASET="JetBrains-Research/lca-results"
3
+ HF_INPUT_DATASET_PATH="commit_message_generation/predictions/o1-preview-2024-09-12/predictions.jsonl"
4
+ HF_INPUT_DATASET_ID_COLUMN="hash"
5
+ HF_INPUT_DATASET_COLUMN_A="reference"
6
+ HF_INPUT_DATASET_COLUMN_B="prediction"
7
+ HF_OUTPUT_DATASET="saridormi/labels"
8
+ HF_OUTPUT_DATASET_DIR="cmg"
.gitignore ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Virtual Environment
2
+ .venv/
3
+ venv/
4
+ ENV/
5
+
6
+ # VS Code
7
+ .vscode/*
8
+ !.vscode/launch.json
9
+ !.vscode/settings.json
10
+
11
+ # Python
12
+ __pycache__/
13
+ *.py[cod]
14
+ *$py.class
15
+ *.so
16
+ .Python
17
+ build/
18
+ develop-eggs/
19
+ dist/
20
+ downloads/
21
+ eggs/
22
+ .eggs/
23
+ lib/
24
+ lib64/
25
+ parts/
26
+ sdist/
27
+ var/
28
+ wheels/
29
+ *.egg-info/
30
+ .installed.cfg
31
+ *.egg
32
+
33
+ # Logs
34
+ logs/
35
+ *.log
36
+
37
+ # Local results
38
+ results/
39
+ temp.jsonl
app.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import os
4
+ import uuid
5
+ import datetime
6
+ import logging
7
+ from huggingface_hub import hf_hub_download, upload_file, list_repo_tree
8
+ from dotenv import load_dotenv
9
+
10
+ load_dotenv()
11
+
12
+ # Configuration
13
+ HF_INPUT_DATASET = os.getenv("HF_INPUT_DATASET")
14
+ HF_INPUT_DATASET_PATH = os.getenv("HF_INPUT_DATASET_PATH")
15
+ HF_INPUT_DATASET_ID_COLUMN = os.getenv("HF_INPUT_DATASET_ID_COLUMN")
16
+ HF_INPUT_DATASET_COLUMN_A = os.getenv("HF_INPUT_DATASET_COLUMN_A")
17
+ HF_INPUT_DATASET_COLUMN_B = os.getenv("HF_INPUT_DATASET_COLUMN_B")
18
+ HF_OUTPUT_DATASET = os.getenv("HF_OUTPUT_DATASET")
19
+ HF_OUTPUT_DATASET_DIR = os.getenv("HF_OUTPUT_DATASET_DIR")
20
+ INSTRUCTIONS = """
21
+ # Pairwise Model Output Labeling
22
+
23
+ Please compare the two model outputs shown below and select which one you think is better.
24
+ - Choose "Left is better" if the left output is superior
25
+ - Choose "Right is better" if the right output is superior
26
+ - Choose "Tie" if they are equally good or bad
27
+ - Choose "Can't choose" if you cannot make a determination
28
+ """
29
+ SAVE_EVERY_N_EXAMPLES = 5
30
+
31
+
32
+ class PairwiseLabeler:
33
+ def __init__(self):
34
+ self.current_index = 0
35
+ self.results = []
36
+ self.df = self.read_hf_dataset()
37
+
38
+ def __len__(self):
39
+ return len(self.df)
40
+
41
+ def read_hf_dataset(self) -> pd.DataFrame:
42
+ try:
43
+ local_file = hf_hub_download(repo_id=HF_INPUT_DATASET, repo_type="dataset", filename=HF_INPUT_DATASET_PATH)
44
+ if local_file.endswith(".json"):
45
+ return pd.read_json(local_file)
46
+ elif local_file.endswith(".jsonl"):
47
+ return pd.read_json(local_file, orient="records",lines=True)
48
+ elif local_file.endswith(".csv"):
49
+ return pd.read_csv(local_file)
50
+ elif local_file.endswith(".parquet"):
51
+ return pd.read_parquet(local_file)
52
+ else:
53
+ raise ValueError(f"Unsupported file type: {local_file}")
54
+ except Exception as e:
55
+ # Fallback to sample data if loading fails
56
+ logging.error(f"Couldn't read HF dataset from {HF_INPUT_DATASET_PATH}. Using sample data instead.")
57
+ sample_data = {
58
+ HF_INPUT_DATASET_ID_COLUMN: [f"sample_{i}" for i in range(SAVE_EVERY_N_EXAMPLES)],
59
+ HF_INPUT_DATASET_COLUMN_A: [f"This is sample generation A {i}" for i in range(SAVE_EVERY_N_EXAMPLES)],
60
+ HF_INPUT_DATASET_COLUMN_B: [f"This is sample generation B {i}" for i in range(SAVE_EVERY_N_EXAMPLES)],
61
+ }
62
+ return pd.DataFrame(sample_data)
63
+
64
+ def get_current_pair(self):
65
+ if self.current_index >= len(self.df):
66
+ return None, None, None
67
+
68
+ item = self.df.iloc[self.current_index]
69
+ item_id = item.get(HF_INPUT_DATASET_ID_COLUMN, f"item_{self.current_index}")
70
+ left_text = item.get(HF_INPUT_DATASET_COLUMN_A, "")
71
+ right_text = item.get(HF_INPUT_DATASET_COLUMN_B, "")
72
+
73
+ return item_id, left_text, right_text
74
+
75
+ def submit_judgment(self, item_id, left_text, right_text, choice):
76
+ if item_id is None:
77
+ return item_id, left_text, right_text, self.current_index
78
+
79
+ # Record the judgment
80
+ result = {
81
+ "item_id": item_id,
82
+ "generation_a": left_text,
83
+ "generation_b": right_text,
84
+ "judgment": choice,
85
+ "timestamp": datetime.datetime.now().isoformat(),
86
+ "labeler_id": str(uuid.uuid4())[:8] # Anonymous ID for the labeling session
87
+ }
88
+
89
+ self.results.append(result)
90
+
91
+ # Move to next item
92
+ self.current_index += 1
93
+
94
+ # Save results periodically
95
+ if len(self.results) % SAVE_EVERY_N_EXAMPLES == 0:
96
+ self.save_results()
97
+
98
+ # Get next pair
99
+ next_id, next_left, next_right = self.get_current_pair()
100
+ return next_id, next_left, next_right, self.current_index
101
+
102
+ def save_results(self):
103
+ if not self.results:
104
+ return
105
+
106
+ try:
107
+ # Convert results to dataset format
108
+ results_df = pd.DataFrame(self.results)
109
+ results_df.to_json("temp.jsonl", orient="records", lines=True)
110
+
111
+ # Push to Hugging Face Hub
112
+ try:
113
+ num_files = len([_ for _ in list_repo_tree(repo_id=HF_OUTPUT_DATASET, repo_type="dataset", path_in_repo=HF_OUTPUT_DATASET_DIR)])
114
+ except Exception as e:
115
+ num_files = 0
116
+ upload_file(repo_id=HF_OUTPUT_DATASET, repo_type="dataset", path_in_repo=os.path.join(HF_OUTPUT_DATASET_DIR, f"results_{num_files+1}.jsonl"), path_or_fileobj="temp.jsonl")
117
+ os.remove("temp.jsonl")
118
+ self.results = []
119
+ logging.info(f"Saved {len(self.results)} results to {HF_OUTPUT_DATASET}")
120
+ except Exception as e:
121
+ logging.error(f"Error saving results: {e}")
122
+
123
+ # Initialize the labeler
124
+ labeler = PairwiseLabeler()
125
+
126
+ # Get the first pair
127
+ initial_id, initial_left, initial_right = labeler.get_current_pair()
128
+
129
+ with gr.Blocks() as app:
130
+ gr.Markdown(INSTRUCTIONS)
131
+
132
+ with gr.Row():
133
+ with gr.Column():
134
+ left_output = gr.Textbox(
135
+ value=initial_left,
136
+ label="Model Output A",
137
+ lines=10,
138
+ interactive=False
139
+ )
140
+
141
+ with gr.Column():
142
+ right_output = gr.Textbox(
143
+ value=initial_right,
144
+ label="Model Output B",
145
+ lines=10,
146
+ interactive=False
147
+ )
148
+
149
+ item_id = gr.Textbox(value=initial_id, visible=False)
150
+
151
+ with gr.Row():
152
+ left_btn = gr.Button("⬅️ A is better", variant="primary")
153
+ right_btn = gr.Button("➡️ B is better", variant="primary")
154
+ tie_btn = gr.Button("🤝 Tie", variant="primary")
155
+ cant_choose_btn = gr.Button("🤔 Can't choose")
156
+
157
+ current_sample_sld = gr.Slider(minimum=0, maximum=len(labeler), step=1,
158
+ value=labeler.current_index,
159
+ interactive=False,
160
+ label='sample_ind',
161
+ info=f"Samples labeled (out of {len(labeler)})",
162
+ show_label=False,
163
+ container=False,
164
+ scale=5)
165
+
166
+ def judge_left(item_id, left_text, right_text):
167
+ return judge("A is better", item_id, left_text, right_text)
168
+
169
+ def judge_right(item_id, left_text, right_text):
170
+ return judge("B is better", item_id, left_text, right_text)
171
+
172
+ def judge_tie(item_id, left_text, right_text):
173
+ return judge("Tie", item_id, left_text, right_text)
174
+
175
+ def judge_cant_choose(item_id, left_text, right_text):
176
+ return judge("Can't choose", item_id, left_text, right_text)
177
+
178
+ def judge(choice, item_id, left_text, right_text):
179
+ new_id, new_left, new_right, new_index = labeler.submit_judgment(
180
+ item_id, left_text, right_text, choice
181
+ )
182
+ return new_id, new_left, new_right, new_index
183
+
184
+ left_btn.click(
185
+ judge_left,
186
+ inputs=[item_id, left_output, right_output],
187
+ outputs=[item_id, left_output, right_output, current_sample_sld]
188
+ )
189
+
190
+ right_btn.click(
191
+ judge_right,
192
+ inputs=[item_id, left_output, right_output],
193
+ outputs=[item_id, left_output, right_output, current_sample_sld]
194
+ )
195
+
196
+ tie_btn.click(
197
+ judge_tie,
198
+ inputs=[item_id, left_output, right_output],
199
+ outputs=[item_id, left_output, right_output, current_sample_sld]
200
+ )
201
+
202
+ cant_choose_btn.click(
203
+ judge_cant_choose,
204
+ inputs=[item_id, left_output, right_output],
205
+ outputs=[item_id, left_output, right_output, current_sample_sld]
206
+ )
207
+
208
+ if __name__ == "__main__":
209
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio>=4.0.0
2
+ pandas>=1.3.0
3
+ datasets>=2.0.0
4
+ huggingface-hub>=0.12.0
5
+ python-dotenv>=1.0.0