Spaces:
Running
Running
Suqi Sun
commited on
Commit
·
c173eef
1
Parent(s):
2e2474f
Upload results for 3 metrics
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- app.py +41 -0
- eval-results/humaneval/0/ckpt_003/humaneval.jsonl.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_003/results.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_003/results_true.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_006/humaneval.jsonl.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_006/results.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_006/results_true.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_009/humaneval.jsonl.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_009/results.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_009/results_true.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_012/humaneval.jsonl.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_012/results.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_012/results_true.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_015/humaneval.jsonl.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_015/results.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_015/results_true.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_018/humaneval.jsonl.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_018/results.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_018/results_true.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_021/humaneval.jsonl.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_021/results.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_021/results_true.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_024/humaneval.jsonl.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_024/results.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_024/results_true.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_027/humaneval.jsonl.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_027/results.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_027/results_true.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_030/humaneval.jsonl.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_030/results.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_030/results_true.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_033/humaneval.jsonl.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_033/results.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_033/results_true.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_036/humaneval.jsonl.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_036/results.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_036/results_true.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_039/humaneval.jsonl.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_039/results.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_039/results_true.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_042/humaneval.jsonl.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_042/results.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_042/results_true.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_045/humaneval.jsonl.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_045/results.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_045/results_true.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_048/humaneval.jsonl.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_048/results.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_048/results_true.json.tar.gz +0 -0
- eval-results/humaneval/0/ckpt_051/humaneval.jsonl.tar.gz +0 -0
app.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import streamlit as st
|
3 |
+
import json
|
4 |
+
import tarfile
|
5 |
+
|
6 |
+
st.set_page_config(layout="wide")
|
7 |
+
|
8 |
+
PARENT_PATH: str = os.path.join(os.path.dirname(os.path.abspath(__file__)))
|
9 |
+
EVAL_DIR: str = os.path.join(PARENT_PATH, "eval-results")
|
10 |
+
st.title("K2 Evaluation Gallery")
|
11 |
+
st.markdown("""The K2 gallery allows one to browse the output of various evaluations on intermediate K2 checkpoints, which provides an intuitive understanding on how the model develops and improves over time.""")
|
12 |
+
|
13 |
+
with st.sidebar:
|
14 |
+
html = f"<img src='https://www.llm360.ai/images/logo-highres.png' width='100' /><img src='https://huggingface.co/spaces/LLM360/k2-gallery/blob/main/k2-logo.svg' width='100' />"
|
15 |
+
st.markdown(html, unsafe_allow_html=True)
|
16 |
+
|
17 |
+
metric = st.radio(
|
18 |
+
"Choose a metric", options=os.listdir(os.path.join(EVAL_DIR))
|
19 |
+
)
|
20 |
+
|
21 |
+
n_shot = st.radio(
|
22 |
+
"Selece an n-shot number", os.listdir(os.path.join(EVAL_DIR, metric))
|
23 |
+
)
|
24 |
+
|
25 |
+
col1, col2 = st.columns(2)
|
26 |
+
|
27 |
+
with col1:
|
28 |
+
st.header("Checkpoint A")
|
29 |
+
ckpt = st.selectbox('Select a checkpoint', sorted(os.listdir(os.path.join(EVAL_DIR, metric, n_shot))), key="A1")
|
30 |
+
st.write(f'Veiwing Evaluation Results for Checkpoint: `{ckpt}`')
|
31 |
+
file = st.selectbox("Select a file", sorted(os.listdir(os.path.join(EVAL_DIR, metric, n_shot, ckpt))), key="A2")
|
32 |
+
with tarfile.open(os.path.join(EVAL_DIR, metric, n_shot, ckpt, file), "r:gz") as f:
|
33 |
+
st.json(json.load(f.extractfile(f.next())))
|
34 |
+
|
35 |
+
with col2:
|
36 |
+
st.header("Checkpoint B")
|
37 |
+
ckpt = st.selectbox('Select a checkpoint', sorted(os.listdir(os.path.join(EVAL_DIR, metric, n_shot))), key="B1")
|
38 |
+
st.write(f'Veiwing Evaluation Results for Checkpoint: `{ckpt}`')
|
39 |
+
file = st.selectbox("Select a file", sorted(os.listdir(os.path.join(EVAL_DIR, metric, n_shot, ckpt))), key="B2")
|
40 |
+
with tarfile.open(os.path.join(EVAL_DIR, metric, n_shot, ckpt, file), "r:gz") as f:
|
41 |
+
st.json(json.load(f.extractfile(f.next())))
|
eval-results/humaneval/0/ckpt_003/humaneval.jsonl.tar.gz
ADDED
Binary file (102 kB). View file
|
|
eval-results/humaneval/0/ckpt_003/results.json.tar.gz
ADDED
Binary file (2.71 kB). View file
|
|
eval-results/humaneval/0/ckpt_003/results_true.json.tar.gz
ADDED
Binary file (739 Bytes). View file
|
|
eval-results/humaneval/0/ckpt_006/humaneval.jsonl.tar.gz
ADDED
Binary file (93.8 kB). View file
|
|
eval-results/humaneval/0/ckpt_006/results.json.tar.gz
ADDED
Binary file (2.71 kB). View file
|
|
eval-results/humaneval/0/ckpt_006/results_true.json.tar.gz
ADDED
Binary file (738 Bytes). View file
|
|
eval-results/humaneval/0/ckpt_009/humaneval.jsonl.tar.gz
ADDED
Binary file (92 kB). View file
|
|
eval-results/humaneval/0/ckpt_009/results.json.tar.gz
ADDED
Binary file (2.72 kB). View file
|
|
eval-results/humaneval/0/ckpt_009/results_true.json.tar.gz
ADDED
Binary file (738 Bytes). View file
|
|
eval-results/humaneval/0/ckpt_012/humaneval.jsonl.tar.gz
ADDED
Binary file (86.6 kB). View file
|
|
eval-results/humaneval/0/ckpt_012/results.json.tar.gz
ADDED
Binary file (2.72 kB). View file
|
|
eval-results/humaneval/0/ckpt_012/results_true.json.tar.gz
ADDED
Binary file (738 Bytes). View file
|
|
eval-results/humaneval/0/ckpt_015/humaneval.jsonl.tar.gz
ADDED
Binary file (91.9 kB). View file
|
|
eval-results/humaneval/0/ckpt_015/results.json.tar.gz
ADDED
Binary file (2.71 kB). View file
|
|
eval-results/humaneval/0/ckpt_015/results_true.json.tar.gz
ADDED
Binary file (737 Bytes). View file
|
|
eval-results/humaneval/0/ckpt_018/humaneval.jsonl.tar.gz
ADDED
Binary file (87.1 kB). View file
|
|
eval-results/humaneval/0/ckpt_018/results.json.tar.gz
ADDED
Binary file (2.74 kB). View file
|
|
eval-results/humaneval/0/ckpt_018/results_true.json.tar.gz
ADDED
Binary file (741 Bytes). View file
|
|
eval-results/humaneval/0/ckpt_021/humaneval.jsonl.tar.gz
ADDED
Binary file (85.2 kB). View file
|
|
eval-results/humaneval/0/ckpt_021/results.json.tar.gz
ADDED
Binary file (2.72 kB). View file
|
|
eval-results/humaneval/0/ckpt_021/results_true.json.tar.gz
ADDED
Binary file (741 Bytes). View file
|
|
eval-results/humaneval/0/ckpt_024/humaneval.jsonl.tar.gz
ADDED
Binary file (95.5 kB). View file
|
|
eval-results/humaneval/0/ckpt_024/results.json.tar.gz
ADDED
Binary file (2.71 kB). View file
|
|
eval-results/humaneval/0/ckpt_024/results_true.json.tar.gz
ADDED
Binary file (738 Bytes). View file
|
|
eval-results/humaneval/0/ckpt_027/humaneval.jsonl.tar.gz
ADDED
Binary file (87.9 kB). View file
|
|
eval-results/humaneval/0/ckpt_027/results.json.tar.gz
ADDED
Binary file (2.71 kB). View file
|
|
eval-results/humaneval/0/ckpt_027/results_true.json.tar.gz
ADDED
Binary file (733 Bytes). View file
|
|
eval-results/humaneval/0/ckpt_030/humaneval.jsonl.tar.gz
ADDED
Binary file (82.1 kB). View file
|
|
eval-results/humaneval/0/ckpt_030/results.json.tar.gz
ADDED
Binary file (2.73 kB). View file
|
|
eval-results/humaneval/0/ckpt_030/results_true.json.tar.gz
ADDED
Binary file (740 Bytes). View file
|
|
eval-results/humaneval/0/ckpt_033/humaneval.jsonl.tar.gz
ADDED
Binary file (84.4 kB). View file
|
|
eval-results/humaneval/0/ckpt_033/results.json.tar.gz
ADDED
Binary file (2.72 kB). View file
|
|
eval-results/humaneval/0/ckpt_033/results_true.json.tar.gz
ADDED
Binary file (736 Bytes). View file
|
|
eval-results/humaneval/0/ckpt_036/humaneval.jsonl.tar.gz
ADDED
Binary file (77.1 kB). View file
|
|
eval-results/humaneval/0/ckpt_036/results.json.tar.gz
ADDED
Binary file (2.71 kB). View file
|
|
eval-results/humaneval/0/ckpt_036/results_true.json.tar.gz
ADDED
Binary file (741 Bytes). View file
|
|
eval-results/humaneval/0/ckpt_039/humaneval.jsonl.tar.gz
ADDED
Binary file (88.5 kB). View file
|
|
eval-results/humaneval/0/ckpt_039/results.json.tar.gz
ADDED
Binary file (2.71 kB). View file
|
|
eval-results/humaneval/0/ckpt_039/results_true.json.tar.gz
ADDED
Binary file (738 Bytes). View file
|
|
eval-results/humaneval/0/ckpt_042/humaneval.jsonl.tar.gz
ADDED
Binary file (84.6 kB). View file
|
|
eval-results/humaneval/0/ckpt_042/results.json.tar.gz
ADDED
Binary file (2.73 kB). View file
|
|
eval-results/humaneval/0/ckpt_042/results_true.json.tar.gz
ADDED
Binary file (737 Bytes). View file
|
|
eval-results/humaneval/0/ckpt_045/humaneval.jsonl.tar.gz
ADDED
Binary file (94.4 kB). View file
|
|
eval-results/humaneval/0/ckpt_045/results.json.tar.gz
ADDED
Binary file (2.71 kB). View file
|
|
eval-results/humaneval/0/ckpt_045/results_true.json.tar.gz
ADDED
Binary file (737 Bytes). View file
|
|
eval-results/humaneval/0/ckpt_048/humaneval.jsonl.tar.gz
ADDED
Binary file (86.5 kB). View file
|
|
eval-results/humaneval/0/ckpt_048/results.json.tar.gz
ADDED
Binary file (2.71 kB). View file
|
|
eval-results/humaneval/0/ckpt_048/results_true.json.tar.gz
ADDED
Binary file (734 Bytes). View file
|
|
eval-results/humaneval/0/ckpt_051/humaneval.jsonl.tar.gz
ADDED
Binary file (85.5 kB). View file
|
|