skillmix / app.py
Dingli Yu
initial
92fb594
raw
history blame
21.6 kB
from functools import partial
import gradio as gr
import os
import csv
import pandas as pd
import pickle
def load_results(gen_file, grader_file, exp_name, model_name, grader):
record = []
if 'llama' in model_name:
version = exp_name.split('_')[2]
k = exp_name.split('_')[0]
if k == 'k2' and version == 'v8':
return []
if k != 'k2' and version == 'v9':
return []
# read csv file
with open(gen_file, 'r') as file:
reader = csv.reader(file)
gen = list(reader)
with open(grader_file, 'r') as file:
reader = csv.reader(file)
grade = list(reader)
# print(gen_file, grader_file)
# print(gen[0], grade[0])
if len(gen) != len(grade):
# print("ERROR: len(gen) != len(grade)")
# print(gen_file, grader_file)
return []
else:
# print(gen[0], grade[0])
# exit(0)
#['k', 'skills', 'topic', 'system prompt', '[INST]_0', '[/INST]_0', 'model_input_0', 'text_0', '[INST]_1', '[/INST]_1', 'model_input_1', 'text_1']
# ['k', 'skills', 'topic', 'system prompt', 'user_0', 'assistant_0', 'model_input_0', 'score_0', 'score_extracted_0', 'points_0', 'num_sentences_manual_in_student_answer_0', 'true_sentence_lim_pt_0', 'num_sentences_extracted_eq_num_sentences_model_0']
for i in range(1, len(gen)):
skills = [skill.strip() for skill in gen[i][1].split(',')]
topic = gen[i][2]
assert(skills == [skill.strip() for skill in grade[i][1].split(',')])
assert(topic == grade[i][2])
points = grade[i][9].split(',')
if len(points) < len(skills):
points = points + ['0.0'] * (len(skills) - len(points))
points[-1] = grade[i][11]
points_no_skill_name = points.copy()
answer = gen[i][-1]
for skill_id, skill in enumerate(skills):
simple_skill = skill.split('(')[0].strip()
if simple_skill in answer:
# print(skill_id, skill, simple_skill, answer, points_no_skill_name, points)
points_no_skill_name[skill_id] = '0.0'
record.append({
'k': gen[i][0],
'exp_name': exp_name,
'model': model_name,
'grader': 'gpt-4' if 'gpt-4' in grader else 'llama-2-70b',
'grade_run': grader,
'skills': '\n\n'.join(skills),
'topic': topic,
'topic+skills': '+'.join([topic] + sorted(skills)),
'gen_prompt': gen[i][4].split('examples for the concepts:')[1].split('Please start the minimal natural')[0].replace('\n', '\n\n'),
'gen': gen[i][-3].replace('\n', '\n\n'),
'grade': grade[i][5].replace('\n', '\n\n'),
'points': ' '.join([(g[:-2] if g[-2:] == '.0' else g) for g in points]),
'points_no_skill_name': ' '.join([(g[:-2] if g[-2:] == '.0' else g) for g in points_no_skill_name]),
# 'sent_limit_point': grade[i][11]
})
return record
def load_all_results(path='final'):
all_results = []
for exp_name in os.listdir(path):
if os.path.isfile(os.path.join(path, exp_name)):
continue
for model_name in os.listdir(os.path.join(path, exp_name)):
gen_file = os.path.join(path, exp_name, model_name, "records.csv")
if os.path.exists(gen_file) and os.path.isdir(os.path.join(path, exp_name, model_name, 'graded')):
for grader in os.listdir(os.path.join(path, exp_name, model_name, 'graded')):
grader_file = os.path.join(path, exp_name, model_name, 'graded', grader, "records.csv")
if os.path.exists(grader_file):
all_results += load_results(gen_file, grader_file, exp_name, model_name, grader)
return pd.DataFrame(all_results)
block_css = """
#a {
background-color: #DEEBF7;
font-size: 20px;
}
#b {
background-color: #E2F0D9;
font-size: 20px;
}
#c {
background-color: #FFF2CC;
font-size: 20px;
}
#d {
background-color: #FBE5D6;
font-size: 20px;
}
"""
from Levenshtein import distance
def best_match(comb, comb_list):
if comb == '':
return comb_list[0]
dist = [distance(comb.split('+'), comb_.split('+')) for comb_ in comb_list]
return comb_list[dist.index(min(dist))]
class Tracker:
def __init__(self, df) -> None:
self.df = df
self.value = {k: '' for k in ['k', 'k_list', 'comb', 'comb_list', 'model', 'model_list', 'exp_name', 'exp_name_list', 'topic', 'skills', 'gen_prompt', 'gen', 'grader', 'grader_list', 'grader_run', 'grader_run_list', 'points', 'grade']}
# self.value = {k: '' for k in ['k', 'k_list', 'comb', 'comb_list', 'model', 'model_list', 'exp_name', 'exp_name_list', 'topic', 'skills', 'gen_prompt', 'gen', 'grader', 'grader_list', 'grader_run', 'grader_run_list', 'points', 'sent_limit_point', 'grade']}
self.value = self.update(self.value)
self.value = [self.value.copy() for _ in range(5)]
self.component = [{k: '' for k in ['k', 'comb', 'model', 'exp_name', 'topic', 'skills', 'gen_prompt', 'gen', 'grader', 'grader_run', 'points', 'grade']} for _ in range(5)]
# self.component = [{k: '' for k in ['k', 'comb', 'model', 'exp_name', 'topic', 'skills', 'gen_prompt', 'gen', 'grader', 'grader_run', 'points', 'sent_limit_point', 'grade']} for _ in range(5)]
def update(self, value):
cdf = self.df
k, comb, model, exp_name, grader, grader_run = value['k'], value['comb'], value['model'], value['exp_name'], value['grader'], value['grader_run']
k_list = sorted(list(cdf.k.unique()))
if k not in k_list:
k = k_list[0]
value['k'] = k
value['k_list'] = k_list
cdf = cdf[cdf.k==k]
comb_list = sorted(list(cdf['topic+skills'].unique()))
if comb not in comb_list:
comb = best_match(comb, comb_list)
value['comb'] = comb
value['comb_list'] = comb_list
cdf = cdf[cdf['topic+skills']==comb]
model_list = sorted(list(cdf['model'].unique()))
if model not in model_list:
model = model_list[0]
value['model'] = model
value['model_list'] = model_list
cdf = cdf[cdf.model==model]
exp_name_list = sorted(list(cdf['exp_name'].unique()))
if exp_name not in exp_name_list:
exp_name = exp_name_list[0]
value['exp_name'] = exp_name
value['exp_name_list'] = exp_name_list
cdf = cdf[cdf.exp_name==exp_name]
value['topic'] = "*Topic*: " + cdf['topic'].unique()[0]
value['skills'] = "*Skills*: \n\n" + cdf['skills'].unique()[0]
value['gen_prompt'] = "*Skill Definition and Example*:\n\n" + cdf['gen_prompt'].unique()[0]
value['gen'] = "*Model Answer*:\n\n" + cdf['gen'].unique()[0]
grader_list = sorted(list(cdf['grader'].unique()))
if grader not in grader_list:
grader = grader_list[0]
value['grader'] = grader
value['grader_list'] = grader_list
cdf = cdf[cdf.grader==grader]
grader_run_list = sorted(list(cdf['grade_run'].unique()))
if grader_run not in grader_run_list:
grader_run = grader_run_list[0]
value['grader_run'] = grader_run
value['grader_run_list'] = grader_run_list
cdf = cdf[cdf.grade_run==grader_run]
value['points'] = "Points: " + cdf['points'].unique()[0] + "\n\n(After deducting points for explicitly mentioning skill names: " + cdf['points_no_skill_name'].unique()[0] + ")"
# value['sent_limit_point'] = "within sentence limit? " + cdf['sent_limit_point'].unique()[0]
value['grade'] = cdf['grade'].unique()[0]
return value
def procedure(self, c):
input_list = []
output_list = []
fn_list = []
# binding = [['k', [0, 1, 3]],
# ['comb', [0, 1, 3]],
# ['model', [0, 1, 3]],
# ['exp_name', [0, 1, 3]],
# ['grader', [0, 1, 3]],
# ['grader_run', [0, 1, 3]],
# ]
binding = []
idx = -1
for i in range(5):
for k, v in self.component[i].items():
if v is c:
idx = i
key = k
break
if idx != -1:
break
assert(idx != -1)
# print(id(c), id(self.component[idx][key]), idx, key)
sync_list = []
for b in binding:
if (key == b[0]) and (idx in b[1]):
sync_list = [j for j in b[1] if j != idx]
sync_component = [self.component[j][key] for j in sync_list]
# print(c.label, key, idx, sync_list)
def sync(v, sync_list=[0]):
return [gr.Dropdown.update(value=v) for _ in range(len(sync_list))]
if len(sync_list) > 0:
input_list.append(c)
output_list.append(sync_component)
fn_list.append(partial(sync, sync_list=sync_list))
def update(k, comb, model, exp_name, grader, grader_run):
value = {
'k': k,
'k_list': '',
'comb': comb,
'comb_list': '',
'model': model,
'model_list': '',
'exp_name': exp_name,
'exp_name_list': '',
'topic': '',
'skills': '',
'gen_prompt': '',
'gen': '',
'grader': grader,
'grader_list': '',
'grader_run': grader_run,
'grader_run_list': '',
'points': '',
'sent_limit_point': '',
'grade': ''
}
value = self.update(value)
return [gr.Dropdown.update(value=value['k'], choices=value['k_list']),
gr.Dropdown.update(value=value['comb'], choices=value['comb_list']),
gr.Dropdown.update(value=value['model'], choices=value['model_list']),
gr.Dropdown.update(value=value['exp_name'], choices=value['exp_name_list']),
value['topic'],
value['skills'],
value['gen_prompt'],
value['gen'],
gr.Dropdown.update(value=value['grader'], choices=value['grader_list']),
gr.Dropdown.update(value=value['grader_run'], choices=value['grader_run_list']),
value['points'],
# value['sent_limit_point'],
value['grade']
]
sync_list += [idx]
update_list = []
for i in range(5):
for j in sync_list:
if self.component[j][key] is self.component[i][key]:
update_list.append(i)
break
for j in update_list:
input_list.append([self.component[j][k] for k in ['k', 'comb', 'model', 'exp_name', 'grader', 'grader_run']])
output_list.append([self.component[j][k] for k in ['k', 'comb', 'model', 'exp_name', 'topic', 'skills', 'gen_prompt', 'gen', 'grader', 'grader_run', 'points', 'grade']])
# output_list.append([self.component[j][k] for k in ['k', 'comb', 'model', 'exp_name', 'topic', 'skills', 'gen_prompt', 'gen', 'grader', 'grader_run', 'points', 'sent_limit_point', 'grade']])
fn_list.append(update)
return input_list, output_list, fn_list
def build_demo(df):
tracker = Tracker(df)
with gr.Blocks(
title="Skill Mix Browsing",
theme=gr.themes.Base(text_size=gr.themes.sizes.text_lg),
css=block_css,
) as demo:
gr.Markdown(
"""
# Skill Mix Browsing
"""
)
with gr.Tab('Browse Single Generation'):
v = tracker.value[0]
with gr.Row():
k = gr.Dropdown(choices=v['k_list'], value=v['k'], label="k")
tracker.component[0]['k'] = k
comb = gr.Dropdown(choices=v['comb_list'], value=v['comb'], label="topic+skills")
tracker.component[0]['comb'] = comb
with gr.Row():
with gr.Column():
with gr.Row():
model = gr.Dropdown(choices=v['model_list'], value=v['model'], label="model")
tracker.component[0]['model'] = model
exp_name = gr.Dropdown(choices=v['exp_name_list'], value=v['exp_name'], label="exp_name")
tracker.component[0]['exp_name'] = exp_name
with gr.Row():
topic = gr.Markdown(value=v['topic'], elem_id='a')
tracker.component[0]['topic'] = topic
skills = gr.Markdown(value=v['skills'], elem_id='a')
tracker.component[0]['skills'] = skills
gen = gr.Markdown(value=v['gen'], elem_id='b')
tracker.component[0]['gen'] = gen
gen_prompt = gr.Markdown(value=v['gen_prompt'], elem_id='a')
tracker.component[0]['gen_prompt'] = gen_prompt
with gr.Column():
with gr.Row():
grader = gr.Dropdown(choices=v['grader_list'], value=v['grader'], label="grader")
tracker.component[0]['grader'] = grader
grader_run = gr.Dropdown(choices=v['grader_run_list'], value=v['grader_run'], label="grader_run")
tracker.component[0]['grader_run'] = grader_run
points = gr.Markdown(value=v['points'], elem_id='c')
tracker.component[0]['points'] = points
# sent_limit_point = gr.Markdown(value=v['sent_limit_point'], elem_id='c')
# tracker.component[0]['sent_limit_point'] = sent_limit_point
grade = gr.Markdown(value=v['grade'], elem_id='d')
tracker.component[0]['grade'] = grade
with gr.Tab('Compare Two Generations'):
v = tracker.value[1]
with gr.Row():
k = gr.Dropdown(choices=v['k_list'], value=v['k'], label="k")
tracker.component[1]['k'] = tracker.component[2]['k'] = k
comb = gr.Dropdown(choices=v['comb_list'], value=v['comb'], label="topic+skills")
tracker.component[1]['comb'] = tracker.component[2]['comb'] = comb
with gr.Row():
for col in range(1, 3):
v = tracker.value[col]
with gr.Column():
with gr.Row():
model = gr.Dropdown(choices=v['model_list'], value=v['model'], label="model")
tracker.component[col]['model'] = model
exp_name = gr.Dropdown(choices=v['exp_name_list'], value=v['exp_name'], label="exp_name")
tracker.component[col]['exp_name'] = exp_name
with gr.Row():
topic = gr.Markdown(value=v['topic'], elem_id='a')
tracker.component[col]['topic'] = topic
skills = gr.Markdown(value=v['skills'], elem_id='a')
tracker.component[col]['skills'] = skills
gen = gr.Markdown(value=v['gen'], elem_id='b')
tracker.component[col]['gen'] = gen
with gr.Row():
grader = gr.Dropdown(choices=v['grader_list'], value=v['grader'], label="grader")
tracker.component[col]['grader'] = grader
grader_run = gr.Dropdown(choices=v['grader_run_list'], value=v['grader_run'], label="grader_run")
tracker.component[col]['grader_run'] = grader_run
points = gr.Markdown(value=v['points'], elem_id='c')
tracker.component[col]['points'] = points
# sent_limit_point = gr.Markdown(value=v['sent_limit_point'], elem_id='c')
# tracker.component[col]['sent_limit_point'] = sent_limit_point
gen_prompt = gr.Markdown(value=v['gen_prompt'], elem_id='a')
tracker.component[col]['gen_prompt'] = gen_prompt
grade = gr.Markdown(value=v['grade'], elem_id='d')
tracker.component[col]['grade'] = grade
# with gr.Tab('One Generation Two Grading'):
# v = tracker.value[3]
# with gr.Row():
# k = gr.Dropdown(choices=v['k_list'], value=v['k'], label="k")
# tracker.component[3]['k'] = tracker.component[4]['k'] = k
# comb = gr.Dropdown(choices=v['comb_list'], value=v['comb'], label="topic+skills")
# tracker.component[3]['comb'] = tracker.component[4]['comb'] = comb
# with gr.Row():
# model = gr.Dropdown(choices=v['model_list'], value=v['model'], label="model")
# tracker.component[3]['model'] = tracker.component[4]['model'] = model
# exp_name = gr.Dropdown(choices=v['exp_name_list'], value=v['exp_name'], label="exp_name")
# tracker.component[3]['exp_name'] = tracker.component[4]['exp_name'] = exp_name
# with gr.Row():
# topic = gr.Markdown(value=v['topic'], elem_id='a')
# tracker.component[3]['topic'] = tracker.component[4]['topic'] = topic
# skills = gr.Markdown(value=v['skills'], elem_id='a')
# tracker.component[3]['skills'] = tracker.component[4]['skills'] = skills
# gen = gr.Markdown(value=v['gen'], elem_id='b')
# tracker.component[3]['gen'] = tracker.component[4]['gen'] = gen
# with gr.Row():
# for col in range(3, 5):
# v = tracker.value[col]
# with gr.Column():
# with gr.Row():
# grader = gr.Dropdown(choices=v['grader_list'], value=v['grader'], label="grader")
# tracker.component[col]['grader'] = grader
# grader_run = gr.Dropdown(choices=v['grader_run_list'], value=v['grader_run'], label="grader_run")
# tracker.component[col]['grader_run'] = grader_run
# points = gr.Markdown(value=v['points'], elem_id='c')
# tracker.component[col]['points'] = points
# sent_limit_point = gr.Markdown(value=v['sent_limit_point'], elem_id='c')
# tracker.component[col]['sent_limit_point'] = sent_limit_point
# grade = gr.Markdown(value=v['grade'], elem_id='d')
# tracker.component[col]['grade'] = grade
# gen_prompt = gr.Markdown(value=v['gen_prompt'], elem_id='a')
# tracker.component[3]['gen_prompt'] = tracker.component[4]['gen_prompt'] = gen_prompt
all_components = sum([list(tracker.component[i].values()) for i in range(5)], [])
all_components = [c for c in all_components if c != '']
all_components = list(set(all_components))
# print(all_components)
for c in all_components:
input_list, output_list, fn_list = tracker.procedure(c)
if len(fn_list) > 0:
if len(fn_list) == 1:
c.change(fn_list[0], input_list[0], output_list[0])
elif len(fn_list) == 2:
c.change(fn_list[0], input_list[0], output_list[0]).then(fn_list[1], input_list[1], output_list[1])
elif len(fn_list) == 3:
c.change(fn_list[0], input_list[0], output_list[0]).then(fn_list[1], input_list[1], output_list[1]).then(fn_list[2], input_list[2], output_list[2])
elif len(fn_list) == 4:
c.change(fn_list[0], input_list[0], output_list[0]).then(fn_list[1], input_list[1], output_list[1]).then(fn_list[2], input_list[2], output_list[2]).then(fn_list[3], input_list[3], output_list[3])
elif len(fn_list) == 5:
c.change(fn_list[0], input_list[0], output_list[0]).then(fn_list[1], input_list[1], output_list[1]).then(fn_list[2], input_list[2], output_list[2]).then(fn_list[3], input_list[3], output_list[3]).then(fn_list[4], input_list[4], output_list[4])
elif len(fn_list) == 6:
c.change(fn_list[0], input_list[0], output_list[0]).then(fn_list[1], input_list[1], output_list[1]).then(fn_list[2], input_list[2], output_list[2]).then(fn_list[3], input_list[3], output_list[3]).then(fn_list[4], input_list[4], output_list[4]).then(fn_list[5], input_list[5], output_list[5])
else:
raise NotImplementedError
return demo
if __name__ == '__main__':
# df = load_all_results(path="../../on_released_topics_and_skills")
# pickle.dump(df, open('on_released_topics_and_skills.pkl', 'wb'))
df = pickle.load(open('on_released_topics_and_skills.pkl', 'rb'))
demo = build_demo(df)
# demo.launch(share=True)
demo.launch()