Spaces:
Sleeping
Sleeping
File size: 2,954 Bytes
143b62d 4fb58cc 87bb867 143b62d d3d48e1 143b62d 87bb867 4fb58cc 87bb867 e302f12 87bb867 e302f12 87bb867 e302f12 87bb867 143b62d dea4ce7 143b62d 87bb867 143b62d aa733b6 87bb867 aa733b6 143b62d af1a6de 143b62d f253a0d 143b62d f253a0d 143b62d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
from dotenv import load_dotenv
import gradio as gr
import random
import numpy as np
from utils.model import Model
from utils.data import dataset
from utils.metric import metric_rouge_score
from pages.summarization_playground import model, generate_answer
load_dotenv()
def display_results(response_list):
overall_score = np.mean([r['metric_score']['rouge_score'] for r in response_list])
html_output = f"<h2>Overall Score: {overall_score:.2f}</h2>"
for i, item in enumerate(response_list, 1):
dialogue = item['dialogue']
summary = item['summary']
response = item['response']
rouge_score = item['metric_score']['rouge_score']
html_output += f"""
<details>
<summary>Response {i} (Rouge Score: {rouge_score:.2f})</summary>
<div style="display: flex; justify-content: space-between;">
<div style="width: 30%;">
<h3>Dialogue</h3>
{dialogue}
</div>
<div style="width: 30%;">
<h3>Summary</h3>
{summary}
</div>
<div style="width: 30%;">
<h3>Response</h3>
{response}
</div>
</div>
</details>
"""
return html_output
def process(seed, model_selection, prompt, num=10):
random.seed(seed)
response_list = []
for data in random.choices(dataset, k=num):
dialogue = data['dialogue']
summary = data['summary']
response = generate_answer(dialogue, model_selection, prompt)
rouge_score = metric_rouge_score(response, summary)
response_list.append(
{
'dialogue': dialogue,
'summary': summary,
'response': response,
'metric_score': {
'rouge_score': rouge_score
}
}
)
return display_results(response_list)
def create_batch_evaluation_interface():
with gr.Blocks() as demo:
gr.Markdown("## Here are evaluation setups. It will randomly sample 10 data points to generate and evaluate. Show results once finished.")
with gr.Row():
seed = gr.Number(value=8, info="pick your favoriate random seed", precision=0)
model_dropdown = gr.Dropdown(choices=Model.__model_list__, label="Choose a model", value=Model.__model_list__[0])
Template_text = gr.Textbox(value="""Summarize the following dialogue""", label='Input Prompting Template', lines=8, placeholder='Input your prompts')
submit_button = gr.Button("✨ Submit ✨")
output = gr.HTML(label="Results")
submit_button.click(
process,
inputs=[seed, model_dropdown, Template_text],
outputs=output
)
return demo
if __name__ == "__main__":
demo = create_batch_evaluation_interface()
demo.launch() |