leonard-dls
Add application file
4eff8a8
raw
history blame
2.57 kB
import json
import random
import gradio as gr
from difflib import SequenceMatcher
file_path = "big_gsm8k_output.jsonl"
similarity_threshold = 0.85
current_index = 0
def find_similar_chunks(original, output):
matcher = SequenceMatcher(None, original, output)
left = 0
highlighted_sequence = []
for _, j, n in matcher.get_matching_blocks():
if left < j:
highlighted_sequence.append((output[left:j], None))
highlighted_sequence.append((output[j:j+n], 1))
left = j + n
if j+n < len(output) - 1:
highlighted_sequence.append((output[j+n:], None))
return highlighted_sequence
with open(file_path, "r") as file:
examples = [json.loads(line) for line in file if json.loads(line)["similarity_ratio"] > similarity_threshold]
def next_example():
new_example = random.choice(examples)
highlighted_output = find_similar_chunks(new_example["original"], new_example["output"])
return(
[
new_example["prompt"],
new_example["original"],
highlighted_output,
new_example["similarity_ratio"],
new_example["seed"]
]
)
with gr.Blocks() as demo:
prompt = gr.Textbox(
label="Prompt",
interactive=False,
value=examples[current_index]["prompt"],
)
with gr.Row():
with gr.Column(scale=4):
original = gr.Textbox(
label="Original",
interactive=False,
value=examples[current_index]["original"],
)
with gr.Column(scale=4):
output = gr.HighlightedText(
label="Output",
color_map={"1": "yellow"},
value=find_similar_chunks(examples[current_index]["original"],
examples[current_index]["output"]),
)
with gr.Row():
with gr.Column(scale=1):
similarity = gr.Textbox(
label="Similarity ratio",
interactive=False,
value=examples[current_index]["similarity_ratio"],
)
with gr.Column(scale=1):
seed = gr.Textbox(
label="Seed",
interactive=False,
value=examples[current_index]["seed"],
)
next_btn = gr.Button("Anoter example")
next_btn.click(fn=next_example,
outputs=[prompt, original, output, similarity, seed])
demo.launch()