File size: 3,630 Bytes
bf75d52
3603153
bf75d52
a425fa9
 
895a686
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3603153
895a686
 
 
 
 
 
 
 
a425fa9
 
 
 
 
 
 
d919aa0
919be7b
d919aa0
 
 
 
919be7b
d919aa0
bf75d52
d919aa0
 
 
 
 
 
919be7b
d919aa0
 
5053d22
d919aa0
 
a425fa9
 
 
d919aa0
 
 
 
 
 
 
 
 
 
 
5053d22
 
d919aa0
 
e7c52d7
d919aa0
e7c52d7
d919aa0
5053d22
 
 
 
895a686
d919aa0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import gradio as gr
import os

# PERSISTENT DATA STORAGE: these are used to upload user responses to a dataset

import json
from datetime import datetime
from pathlib import Path
from uuid import uuid4 
from huggingface_hub import CommitScheduler

JSON_DATASET_DIR = Path("json_dataset")
JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)

JSON_DATASET_PATH = JSON_DATASET_DIR / f"train-{uuid4()}.json"

scheduler = CommitScheduler(
    repo_id="ebrowne/test-data",
    repo_type="dataset",
    folder_path=JSON_DATASET_DIR,
    path_in_repo="data",
    token = os.getenv("HF_TOKEN")
)

def save_json(score1, score2):
    with scheduler.lock:
        with JSON_DATASET_PATH.open("a") as f:
            json.dump({"relevance": score1, "novelty": score2, "datetime": datetime.now().isoformat()}, f)
            f.write("\n")

# READING EXISTING DATA: this is used to read questionss

from datasets import load_dataset
qa_data = load_dataset("ebrowne/test-data", data_files = "test.json")
loaded_text = qa_data["train"]["example_string"][0]


# VARIABLES: will eventually be loaded with JSON from a dataset 

question_text = """
    ### Bar Question
    What is the answer to this question?"""
answers_text = ["A", "B", "C", "D"]

# BLOCKS: main user interface

with gr.Blocks() as user_eval:
    # Title text introducing study
    gr.Markdown("""
    # Legal Retriever Evaluation Study
    Thank you for your participation! Here are some basic instructions on how to complete the legal study.
    """)

    # Passages and user evaluations thereof
    with gr.Row(equal_height = False, visible = False) as evals:
        # Passage text
        with gr.Column(scale = 2) as passages:
            passage_display = gr.Markdown("""
            ### Question
            """ + loaded_text + 
            """
            ### Relevant Passages
                - Dataset 1
                - Dataset 2
                - More text
                - More text
                - More text
                - More text
            ### Auto-Generated Summary
            This is a summary of the above legal passages, which imitates how a RAG system might \
            encorporate retrieved data into its context to give a better response to a certain query.
            """)

        # Scoring box
        with gr.Column(scale = 1) as scores:
            desc_1 = gr.Markdown("How **relevant** are these passages to our query?")
            eval_1 = gr.Slider(1, 5, step = 0.5)
            desc_2 = gr.Markdown("How **novel** are these passages compared to the previous passages?")
            eval_2 = gr.Slider(1, 5, step = 0.5)
            btn = gr.Button("Next")

            def next(eval_1, eval_2 = 0):
                print(eval_1 + eval_2)

            btn.click(fn = save_json, inputs = [eval_1, eval_2])

    # Question and answering dynamics
    with gr.Row() as question:
        with gr.Column():
            gr.Markdown(question_text)
            a = gr.Button(answers_text[0])
            b = gr.Button(answers_text[1])
            c = gr.Button(answers_text[2])
            d = gr.Button(answers_text[3])

            def answer():
                return {
                    question: gr.Row(visible = False),
                    evals: gr.Row(visible = True)
                }
            
            a.click(fn = answer, outputs = [question, evals])
            b.click(fn = answer, outputs = [question, evals])
            c.click(fn = answer, outputs = [question, evals])
            d.click(fn = answer, outputs = [question, evals])

# Starts on question, switches to evaluation after the user answers
user_eval.launch()