File size: 2,571 Bytes
4eff8a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import json
import random

import gradio as gr
from difflib import SequenceMatcher

file_path = "big_gsm8k_output.jsonl"
similarity_threshold = 0.85
current_index = 0 


def find_similar_chunks(original, output):
    matcher = SequenceMatcher(None, original, output)
    left = 0
    highlighted_sequence = []
    for _, j, n in matcher.get_matching_blocks():
        if left < j:
            highlighted_sequence.append((output[left:j], None))
        highlighted_sequence.append((output[j:j+n], 1))
        left = j + n
    if j+n < len(output) - 1:
        highlighted_sequence.append((output[j+n:], None))
    
    return highlighted_sequence

with open(file_path, "r") as file:
    examples = [json.loads(line) for line in file if json.loads(line)["similarity_ratio"] > similarity_threshold]

def next_example():
    new_example = random.choice(examples)

    highlighted_output = find_similar_chunks(new_example["original"], new_example["output"])
    return(
        [
            new_example["prompt"],
            new_example["original"], 
            highlighted_output, 
            new_example["similarity_ratio"],
            new_example["seed"]
        ]
    )

with gr.Blocks() as demo:
    prompt = gr.Textbox(
        label="Prompt",
        interactive=False,
        value=examples[current_index]["prompt"],
    )
    with gr.Row():
        with gr.Column(scale=4):
            original = gr.Textbox(
                label="Original",
                interactive=False,
                value=examples[current_index]["original"],
            )
        with gr.Column(scale=4):
            output = gr.HighlightedText(
                label="Output",
                color_map={"1": "yellow"},
                value=find_similar_chunks(examples[current_index]["original"], 
                                        examples[current_index]["output"]),
            )

        with gr.Row():
            with gr.Column(scale=1):
                similarity = gr.Textbox(
                    label="Similarity ratio",
                    interactive=False,
                    value=examples[current_index]["similarity_ratio"],
                )
            with gr.Column(scale=1):
                seed = gr.Textbox(
                    label="Seed",
                    interactive=False,
                    value=examples[current_index]["seed"],
                )
                
    next_btn = gr.Button("Anoter example")
    
    next_btn.click(fn=next_example, 
                    outputs=[prompt, original, output, similarity, seed])



demo.launch()