Spaces:
Running
Running
File size: 2,445 Bytes
c173eef 92274b2 6eb17e2 c173eef 6eb17e2 c173eef 911c9b4 c173eef 6713f3f c173eef 6713f3f c173eef 911c9b4 6713f3f c173eef 6713f3f 92274b2 6713f3f 911c9b4 6713f3f 911c9b4 c173eef 911c9b4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import os
import streamlit as st
import json
import tarfile
st.set_page_config(layout="wide")
# Disable scroll bar
st.html("<style> .main {overflow: hidden} </style>")
PARENT_DIR: str = os.path.join(os.path.dirname(os.path.abspath(__file__)))
EVAL_DIR: str = os.path.join(PARENT_DIR, "eval-results")
st.title("K2 Evaluation Gallery")
st.markdown("""The K2 gallery allows one to browse the output of various evaluations on intermediate K2 checkpoints, which provides an intuitive understanding on how the model develops and improves over time.""")
with st.sidebar:
html = f"<img src='https://www.llm360.ai/images/logo-highres.png' width='100' /><img src='https://huggingface.co/spaces/LLM360/k2-eval-gallery/raw/main/k2-logo.svg' width='100' />"
st.markdown(html, unsafe_allow_html=True)
metric = st.radio(
"Choose a metric", options=os.listdir(os.path.join(EVAL_DIR)),
help="type of evaluation benchmark task"
)
n_shot = st.radio(
"Selece an n-shot number", os.listdir(os.path.join(EVAL_DIR, metric)),
help="number of examples included in few-shot prompting"
)
col1, col2 = st.columns(2)
def render_column(col_label):
st.header(f"Checkpoint {col_label}")
ckpt = st.select_slider('Select a checkpoint', sorted(os.listdir(os.path.join(EVAL_DIR, metric, n_shot))), key=col_label + '1', help="checkpoint index from 3 to 360")
st.write(f'Veiwing Evaluation Results for Checkpoint: `{ckpt}`')
suffix, result_file = ".tar.gz", "results.json"
file_list: list = sorted(f_name[:-len(suffix)] for f_name in os.listdir(os.path.join(EVAL_DIR, metric, n_shot, ckpt)))
if result_file in file_list:
file_list.remove(result_file)
file_list = file_list + [result_file]
file = st.selectbox("Select a file", file_list, key=col_label + '2', help="a list of raw output files from evaluation results")
file += suffix
with tarfile.open(os.path.join(EVAL_DIR, metric, n_shot, ckpt, file), "r:gz") as tar:
f = tar.extractfile(tar.next())
eval_json = json.load(f)
if isinstance(eval_json, list):
doc_id = st.slider("Select a document id", 0, len(eval_json) - 1, 0, 1, key=col_label + '3', help="index of a specific question/task in current file")
st.json(eval_json[doc_id])
else:
st.json(eval_json)
f.close()
with col1:
render_column('A')
with col2:
render_column('B')
|