Suqi Sun commited on
Commit
c173eef
·
1 Parent(s): 2e2474f

Upload results for 3 metrics

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. app.py +41 -0
  2. eval-results/humaneval/0/ckpt_003/humaneval.jsonl.tar.gz +0 -0
  3. eval-results/humaneval/0/ckpt_003/results.json.tar.gz +0 -0
  4. eval-results/humaneval/0/ckpt_003/results_true.json.tar.gz +0 -0
  5. eval-results/humaneval/0/ckpt_006/humaneval.jsonl.tar.gz +0 -0
  6. eval-results/humaneval/0/ckpt_006/results.json.tar.gz +0 -0
  7. eval-results/humaneval/0/ckpt_006/results_true.json.tar.gz +0 -0
  8. eval-results/humaneval/0/ckpt_009/humaneval.jsonl.tar.gz +0 -0
  9. eval-results/humaneval/0/ckpt_009/results.json.tar.gz +0 -0
  10. eval-results/humaneval/0/ckpt_009/results_true.json.tar.gz +0 -0
  11. eval-results/humaneval/0/ckpt_012/humaneval.jsonl.tar.gz +0 -0
  12. eval-results/humaneval/0/ckpt_012/results.json.tar.gz +0 -0
  13. eval-results/humaneval/0/ckpt_012/results_true.json.tar.gz +0 -0
  14. eval-results/humaneval/0/ckpt_015/humaneval.jsonl.tar.gz +0 -0
  15. eval-results/humaneval/0/ckpt_015/results.json.tar.gz +0 -0
  16. eval-results/humaneval/0/ckpt_015/results_true.json.tar.gz +0 -0
  17. eval-results/humaneval/0/ckpt_018/humaneval.jsonl.tar.gz +0 -0
  18. eval-results/humaneval/0/ckpt_018/results.json.tar.gz +0 -0
  19. eval-results/humaneval/0/ckpt_018/results_true.json.tar.gz +0 -0
  20. eval-results/humaneval/0/ckpt_021/humaneval.jsonl.tar.gz +0 -0
  21. eval-results/humaneval/0/ckpt_021/results.json.tar.gz +0 -0
  22. eval-results/humaneval/0/ckpt_021/results_true.json.tar.gz +0 -0
  23. eval-results/humaneval/0/ckpt_024/humaneval.jsonl.tar.gz +0 -0
  24. eval-results/humaneval/0/ckpt_024/results.json.tar.gz +0 -0
  25. eval-results/humaneval/0/ckpt_024/results_true.json.tar.gz +0 -0
  26. eval-results/humaneval/0/ckpt_027/humaneval.jsonl.tar.gz +0 -0
  27. eval-results/humaneval/0/ckpt_027/results.json.tar.gz +0 -0
  28. eval-results/humaneval/0/ckpt_027/results_true.json.tar.gz +0 -0
  29. eval-results/humaneval/0/ckpt_030/humaneval.jsonl.tar.gz +0 -0
  30. eval-results/humaneval/0/ckpt_030/results.json.tar.gz +0 -0
  31. eval-results/humaneval/0/ckpt_030/results_true.json.tar.gz +0 -0
  32. eval-results/humaneval/0/ckpt_033/humaneval.jsonl.tar.gz +0 -0
  33. eval-results/humaneval/0/ckpt_033/results.json.tar.gz +0 -0
  34. eval-results/humaneval/0/ckpt_033/results_true.json.tar.gz +0 -0
  35. eval-results/humaneval/0/ckpt_036/humaneval.jsonl.tar.gz +0 -0
  36. eval-results/humaneval/0/ckpt_036/results.json.tar.gz +0 -0
  37. eval-results/humaneval/0/ckpt_036/results_true.json.tar.gz +0 -0
  38. eval-results/humaneval/0/ckpt_039/humaneval.jsonl.tar.gz +0 -0
  39. eval-results/humaneval/0/ckpt_039/results.json.tar.gz +0 -0
  40. eval-results/humaneval/0/ckpt_039/results_true.json.tar.gz +0 -0
  41. eval-results/humaneval/0/ckpt_042/humaneval.jsonl.tar.gz +0 -0
  42. eval-results/humaneval/0/ckpt_042/results.json.tar.gz +0 -0
  43. eval-results/humaneval/0/ckpt_042/results_true.json.tar.gz +0 -0
  44. eval-results/humaneval/0/ckpt_045/humaneval.jsonl.tar.gz +0 -0
  45. eval-results/humaneval/0/ckpt_045/results.json.tar.gz +0 -0
  46. eval-results/humaneval/0/ckpt_045/results_true.json.tar.gz +0 -0
  47. eval-results/humaneval/0/ckpt_048/humaneval.jsonl.tar.gz +0 -0
  48. eval-results/humaneval/0/ckpt_048/results.json.tar.gz +0 -0
  49. eval-results/humaneval/0/ckpt_048/results_true.json.tar.gz +0 -0
  50. eval-results/humaneval/0/ckpt_051/humaneval.jsonl.tar.gz +0 -0
app.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import json
4
+ import tarfile
5
+
6
+ st.set_page_config(layout="wide")
7
+
8
+ PARENT_PATH: str = os.path.join(os.path.dirname(os.path.abspath(__file__)))
9
+ EVAL_DIR: str = os.path.join(PARENT_PATH, "eval-results")
10
+ st.title("K2 Evaluation Gallery")
11
+ st.markdown("""The K2 gallery allows one to browse the output of various evaluations on intermediate K2 checkpoints, which provides an intuitive understanding on how the model develops and improves over time.""")
12
+
13
+ with st.sidebar:
14
+ html = f"<img src='https://www.llm360.ai/images/logo-highres.png' width='100' /><img src='https://huggingface.co/spaces/LLM360/k2-gallery/blob/main/k2-logo.svg' width='100' />"
15
+ st.markdown(html, unsafe_allow_html=True)
16
+
17
+ metric = st.radio(
18
+ "Choose a metric", options=os.listdir(os.path.join(EVAL_DIR))
19
+ )
20
+
21
+ n_shot = st.radio(
22
+ "Selece an n-shot number", os.listdir(os.path.join(EVAL_DIR, metric))
23
+ )
24
+
25
+ col1, col2 = st.columns(2)
26
+
27
+ with col1:
28
+ st.header("Checkpoint A")
29
+ ckpt = st.selectbox('Select a checkpoint', sorted(os.listdir(os.path.join(EVAL_DIR, metric, n_shot))), key="A1")
30
+ st.write(f'Veiwing Evaluation Results for Checkpoint: `{ckpt}`')
31
+ file = st.selectbox("Select a file", sorted(os.listdir(os.path.join(EVAL_DIR, metric, n_shot, ckpt))), key="A2")
32
+ with tarfile.open(os.path.join(EVAL_DIR, metric, n_shot, ckpt, file), "r:gz") as f:
33
+ st.json(json.load(f.extractfile(f.next())))
34
+
35
+ with col2:
36
+ st.header("Checkpoint B")
37
+ ckpt = st.selectbox('Select a checkpoint', sorted(os.listdir(os.path.join(EVAL_DIR, metric, n_shot))), key="B1")
38
+ st.write(f'Veiwing Evaluation Results for Checkpoint: `{ckpt}`')
39
+ file = st.selectbox("Select a file", sorted(os.listdir(os.path.join(EVAL_DIR, metric, n_shot, ckpt))), key="B2")
40
+ with tarfile.open(os.path.join(EVAL_DIR, metric, n_shot, ckpt, file), "r:gz") as f:
41
+ st.json(json.load(f.extractfile(f.next())))
eval-results/humaneval/0/ckpt_003/humaneval.jsonl.tar.gz ADDED
Binary file (102 kB). View file
 
eval-results/humaneval/0/ckpt_003/results.json.tar.gz ADDED
Binary file (2.71 kB). View file
 
eval-results/humaneval/0/ckpt_003/results_true.json.tar.gz ADDED
Binary file (739 Bytes). View file
 
eval-results/humaneval/0/ckpt_006/humaneval.jsonl.tar.gz ADDED
Binary file (93.8 kB). View file
 
eval-results/humaneval/0/ckpt_006/results.json.tar.gz ADDED
Binary file (2.71 kB). View file
 
eval-results/humaneval/0/ckpt_006/results_true.json.tar.gz ADDED
Binary file (738 Bytes). View file
 
eval-results/humaneval/0/ckpt_009/humaneval.jsonl.tar.gz ADDED
Binary file (92 kB). View file
 
eval-results/humaneval/0/ckpt_009/results.json.tar.gz ADDED
Binary file (2.72 kB). View file
 
eval-results/humaneval/0/ckpt_009/results_true.json.tar.gz ADDED
Binary file (738 Bytes). View file
 
eval-results/humaneval/0/ckpt_012/humaneval.jsonl.tar.gz ADDED
Binary file (86.6 kB). View file
 
eval-results/humaneval/0/ckpt_012/results.json.tar.gz ADDED
Binary file (2.72 kB). View file
 
eval-results/humaneval/0/ckpt_012/results_true.json.tar.gz ADDED
Binary file (738 Bytes). View file
 
eval-results/humaneval/0/ckpt_015/humaneval.jsonl.tar.gz ADDED
Binary file (91.9 kB). View file
 
eval-results/humaneval/0/ckpt_015/results.json.tar.gz ADDED
Binary file (2.71 kB). View file
 
eval-results/humaneval/0/ckpt_015/results_true.json.tar.gz ADDED
Binary file (737 Bytes). View file
 
eval-results/humaneval/0/ckpt_018/humaneval.jsonl.tar.gz ADDED
Binary file (87.1 kB). View file
 
eval-results/humaneval/0/ckpt_018/results.json.tar.gz ADDED
Binary file (2.74 kB). View file
 
eval-results/humaneval/0/ckpt_018/results_true.json.tar.gz ADDED
Binary file (741 Bytes). View file
 
eval-results/humaneval/0/ckpt_021/humaneval.jsonl.tar.gz ADDED
Binary file (85.2 kB). View file
 
eval-results/humaneval/0/ckpt_021/results.json.tar.gz ADDED
Binary file (2.72 kB). View file
 
eval-results/humaneval/0/ckpt_021/results_true.json.tar.gz ADDED
Binary file (741 Bytes). View file
 
eval-results/humaneval/0/ckpt_024/humaneval.jsonl.tar.gz ADDED
Binary file (95.5 kB). View file
 
eval-results/humaneval/0/ckpt_024/results.json.tar.gz ADDED
Binary file (2.71 kB). View file
 
eval-results/humaneval/0/ckpt_024/results_true.json.tar.gz ADDED
Binary file (738 Bytes). View file
 
eval-results/humaneval/0/ckpt_027/humaneval.jsonl.tar.gz ADDED
Binary file (87.9 kB). View file
 
eval-results/humaneval/0/ckpt_027/results.json.tar.gz ADDED
Binary file (2.71 kB). View file
 
eval-results/humaneval/0/ckpt_027/results_true.json.tar.gz ADDED
Binary file (733 Bytes). View file
 
eval-results/humaneval/0/ckpt_030/humaneval.jsonl.tar.gz ADDED
Binary file (82.1 kB). View file
 
eval-results/humaneval/0/ckpt_030/results.json.tar.gz ADDED
Binary file (2.73 kB). View file
 
eval-results/humaneval/0/ckpt_030/results_true.json.tar.gz ADDED
Binary file (740 Bytes). View file
 
eval-results/humaneval/0/ckpt_033/humaneval.jsonl.tar.gz ADDED
Binary file (84.4 kB). View file
 
eval-results/humaneval/0/ckpt_033/results.json.tar.gz ADDED
Binary file (2.72 kB). View file
 
eval-results/humaneval/0/ckpt_033/results_true.json.tar.gz ADDED
Binary file (736 Bytes). View file
 
eval-results/humaneval/0/ckpt_036/humaneval.jsonl.tar.gz ADDED
Binary file (77.1 kB). View file
 
eval-results/humaneval/0/ckpt_036/results.json.tar.gz ADDED
Binary file (2.71 kB). View file
 
eval-results/humaneval/0/ckpt_036/results_true.json.tar.gz ADDED
Binary file (741 Bytes). View file
 
eval-results/humaneval/0/ckpt_039/humaneval.jsonl.tar.gz ADDED
Binary file (88.5 kB). View file
 
eval-results/humaneval/0/ckpt_039/results.json.tar.gz ADDED
Binary file (2.71 kB). View file
 
eval-results/humaneval/0/ckpt_039/results_true.json.tar.gz ADDED
Binary file (738 Bytes). View file
 
eval-results/humaneval/0/ckpt_042/humaneval.jsonl.tar.gz ADDED
Binary file (84.6 kB). View file
 
eval-results/humaneval/0/ckpt_042/results.json.tar.gz ADDED
Binary file (2.73 kB). View file
 
eval-results/humaneval/0/ckpt_042/results_true.json.tar.gz ADDED
Binary file (737 Bytes). View file
 
eval-results/humaneval/0/ckpt_045/humaneval.jsonl.tar.gz ADDED
Binary file (94.4 kB). View file
 
eval-results/humaneval/0/ckpt_045/results.json.tar.gz ADDED
Binary file (2.71 kB). View file
 
eval-results/humaneval/0/ckpt_045/results_true.json.tar.gz ADDED
Binary file (737 Bytes). View file
 
eval-results/humaneval/0/ckpt_048/humaneval.jsonl.tar.gz ADDED
Binary file (86.5 kB). View file
 
eval-results/humaneval/0/ckpt_048/results.json.tar.gz ADDED
Binary file (2.71 kB). View file
 
eval-results/humaneval/0/ckpt_048/results_true.json.tar.gz ADDED
Binary file (734 Bytes). View file
 
eval-results/humaneval/0/ckpt_051/humaneval.jsonl.tar.gz ADDED
Binary file (85.5 kB). View file