|
import gradio as gr |
|
import json |
|
import hashlib |
|
import sqlite3 |
|
import os |
|
|
|
class JSONLViewer: |
|
def __init__(self, data_file_path, db_path): |
|
self.data_file_path = data_file_path |
|
self.db_path = db_path |
|
self.current_index = 0 |
|
self.data = [] |
|
self.load_data() |
|
self.init_db() |
|
|
|
def load_data(self): |
|
with open(self.data_file_path, 'r', encoding='utf-8') as file: |
|
self.data = [json.loads(line) for line in file] |
|
|
|
def init_db(self): |
|
conn = sqlite3.connect(self.db_path) |
|
c = conn.cursor() |
|
c.execute('''CREATE TABLE IF NOT EXISTS states |
|
(record_id TEXT PRIMARY KEY, status TEXT)''') |
|
conn.commit() |
|
conn.close() |
|
|
|
def get_current_record(self): |
|
if 0 <= self.current_index < len(self.data): |
|
return self.data[self.current_index] |
|
return None |
|
|
|
def get_record_id(self, record): |
|
record_str = json.dumps(record, sort_keys=True) |
|
return hashlib.md5(record_str.encode()).hexdigest() |
|
|
|
def get_status(self, record_id): |
|
conn = sqlite3.connect(self.db_path) |
|
c = conn.cursor() |
|
c.execute("SELECT status FROM states WHERE record_id = ?", (record_id,)) |
|
result = c.fetchone() |
|
conn.close() |
|
return result[0] if result else "" |
|
|
|
def set_status(self, record_id, status): |
|
conn = sqlite3.connect(self.db_path) |
|
c = conn.cursor() |
|
c.execute("INSERT OR REPLACE INTO states (record_id, status) VALUES (?, ?)", |
|
(record_id, status)) |
|
conn.commit() |
|
conn.close() |
|
|
|
def move_prev(self): |
|
if self.current_index > 0: |
|
self.current_index -= 1 |
|
return self.get_current_record() |
|
|
|
def move_next(self): |
|
if self.current_index < len(self.data) - 1: |
|
self.current_index += 1 |
|
return self.get_current_record() |
|
|
|
viewer = JSONLViewer('plik.jsonl', 'states.db') |
|
|
|
def update_ui(record): |
|
if record: |
|
instruction = record['conversations'][0]['value'] if record['conversations'] else "" |
|
chosen = record['chosen']['value'] if 'chosen' in record else "" |
|
rejected = record['rejected']['value'] if 'rejected' in record else "" |
|
chosen_score = record['chosen_score'] if 'chosen_score' in record else "" |
|
rejected_score = record['rejected_score'] if 'rejected_score' in record else "" |
|
record_id = viewer.get_record_id(record) |
|
status = viewer.get_status(record_id) |
|
return instruction, chosen, rejected, chosen_score, rejected_score, status |
|
return "", "", "", "", "", "" |
|
|
|
def on_prev(): |
|
record = viewer.move_prev() |
|
return update_ui(record) |
|
|
|
def on_next(): |
|
record = viewer.move_next() |
|
return update_ui(record) |
|
|
|
def on_ok(): |
|
record = viewer.get_current_record() |
|
if record: |
|
record_id = viewer.get_record_id(record) |
|
viewer.set_status(record_id, "ok") |
|
return update_ui(record) |
|
|
|
def on_rejected(): |
|
record = viewer.get_current_record() |
|
if record: |
|
record_id = viewer.get_record_id(record) |
|
viewer.set_status(record_id, "rejected") |
|
return update_ui(record) |
|
|
|
with gr.Blocks(css="button.ok-button { background-color: #4CAF50 !important; }") as demo: |
|
instruction = gr.Textbox(label="INSTRUCTION", lines=2, max_lines=2) |
|
chosen = gr.Textbox(label="CHOSEN", lines=12, max_lines=12) |
|
rejected = gr.Textbox(label="REJECTED", lines=12, max_lines=12) |
|
chosen_score = gr.Number(label="CHOSEN SCORE") |
|
rejected_score = gr.Number(label="REJECTED SCORE") |
|
status = gr.Textbox(label="STATUS") |
|
|
|
with gr.Row(): |
|
prev_btn = gr.Button("PREV") |
|
next_btn = gr.Button("NEXT") |
|
ok_btn = gr.Button("OK", elem_classes="ok-button") |
|
rejected_btn = gr.Button("REJECTED", variant="stop") |
|
|
|
prev_btn.click(on_prev, outputs=[instruction, chosen, rejected, chosen_score, rejected_score, status]) |
|
next_btn.click(on_next, outputs=[instruction, chosen, rejected, chosen_score, rejected_score, status]) |
|
ok_btn.click(on_ok, outputs=[instruction, chosen, rejected, chosen_score, rejected_score, status]) |
|
rejected_btn.click(on_rejected, outputs=[instruction, chosen, rejected, chosen_score, rejected_score, status]) |
|
|
|
|
|
initial_record = viewer.get_current_record() |
|
if initial_record: |
|
init_instruction, init_chosen, init_rejected, init_chosen_score, init_rejected_score, init_status = update_ui(initial_record) |
|
instruction.value = init_instruction |
|
chosen.value = init_chosen |
|
rejected.value = init_rejected |
|
chosen_score.value = init_chosen_score |
|
rejected_score.value = init_rejected_score |
|
status.value = init_status |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |