gyigit commited on
Commit
e9f3238
·
2 Parent(s): 8babe71 1cc2077

Merge branch 'main' of https://huggingface.co/spaces/mgyigit/probe3

Browse files
Files changed (8) hide show
  1. .gitattributes +35 -0
  2. Makefile +13 -0
  3. README.md +45 -0
  4. app.py +129 -0
  5. index.html +19 -0
  6. pyproject.toml +13 -0
  7. requirements.txt +19 -0
  8. style.css +28 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Makefile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .PHONY: style format
2
+
3
+
4
+ style:
5
+ python -m black --line-length 119 .
6
+ python -m isort .
7
+ ruff check --fix .
8
+
9
+
10
+ quality:
11
+ python -m black --check --line-length 119 .
12
+ python -m isort --check-only .
13
+ ruff check .
README.md ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: PROBE
3
+ emoji: 🥇
4
+ colorFrom: green
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ app_file: app.py
8
+ pinned: false
9
+ license: gpl
10
+ python_version: 3.8.1
11
+ ---
12
+
13
+ # Start the configuration
14
+
15
+ Most of the variables to change for a default leaderboard are in `src/env.py` (replace the path for your leaderboard) and `src/about.py` (for tasks).
16
+
17
+ Results files should have the following format and be stored as json files:
18
+ ```json
19
+ {
20
+ "config": {
21
+ "model_dtype": "torch.float16", # or torch.bfloat16 or 8bit or 4bit
22
+ "model_name": "path of the model on the hub: org/model",
23
+ "model_sha": "revision on the hub",
24
+ },
25
+ "results": {
26
+ "task_name": {
27
+ "metric_name": score,
28
+ },
29
+ "task_name2": {
30
+ "metric_name": score,
31
+ }
32
+ }
33
+ }
34
+ ```
35
+
36
+ Request files are created automatically by this tool.
37
+
38
+ If you encounter problem on the space, don't hesitate to restart it to remove the create eval-queue, eval-queue-bk, eval-results and eval-results-bk created folder.
39
+
40
+ # Code logic for more complex edits
41
+
42
+ You'll find
43
+ - the main table' columns names and properties in `src/display/utils.py`
44
+ - the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
45
+ - the logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`
app.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions']
2
+
3
+ import gradio as gr
4
+ import pandas as pd
5
+ import re
6
+ import pandas as pd
7
+ import os
8
+ import json
9
+
10
+ from src.about import *
11
+
12
+ global data_component, filter_component
13
+
14
+
15
+ def get_baseline_df():
16
+ df = pd.read_csv(CSV_RESULT_PATH)
17
+ present_columns = ["Method"] + checkbox_group.value
18
+ df = df[present_columns]
19
+ return df
20
+
21
+ def add_new_eval(
22
+ human_file,
23
+ skempi_file,
24
+ model_name_textbox: str,
25
+ revision_name_textbox: str,
26
+ benchmark_type: str,
27
+ ):
28
+ representation_name = model_name_textbox if revision_name_textbox == '' else revision_name_textbox
29
+ print(representation_name)
30
+ # Save human and skempi files under ./src/data/representation_vectors using pandas
31
+ if human_file is not None:
32
+ human_df = pd.read_csv(human_file)
33
+ human_df.to_csv(f"./src/data/representation_vectors/{representation_name}_human.csv", index=False)
34
+
35
+ return None
36
+
37
+ block = gr.Blocks()
38
+
39
+ with block:
40
+ gr.Markdown(
41
+ LEADERBOARD_INTRODUCTION
42
+ )
43
+ with gr.Tabs(elem_classes="tab-buttons") as tabs:
44
+ # table jmmmu bench
45
+ with gr.TabItem("🏅 PROBE Benchmark", elem_id="probe-benchmark-tab-table", id=1):
46
+ # selection for column part:
47
+ checkbox_group = gr.CheckboxGroup(
48
+ choices=TASK_INFO,
49
+ label="Benchmark Type",
50
+ interactive=True,
51
+ ) # user can select the evaluation dimension
52
+
53
+ baseline_value = get_baseline_df()
54
+ baseline_header = ["Method"] + checkbox_group.value
55
+ baseline_datatype = ['markdown'] + ['number'] * len(checkbox_group.value)
56
+
57
+ data_component = gr.components.Dataframe(
58
+ value=baseline_value,
59
+ headers=baseline_header,
60
+ type="pandas",
61
+ datatype=baseline_datatype,
62
+ interactive=False,
63
+ visible=True,
64
+ )
65
+
66
+ # table 5
67
+ with gr.TabItem("📝 About", elem_id="probe-benchmark-tab-table", id=2):
68
+ with gr.Row():
69
+ gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
70
+
71
+ with gr.TabItem("🚀 Submit here! ", elem_id="probe-benchmark-tab-table", id=3):
72
+ with gr.Row():
73
+ gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
74
+
75
+ with gr.Row():
76
+ gr.Markdown("# ✉️✨ Submit your model's representation files here!", elem_classes="markdown-text")
77
+
78
+ with gr.Row():
79
+ with gr.Column():
80
+ model_name_textbox = gr.Textbox(
81
+ label="Model name",
82
+ )
83
+ revision_name_textbox = gr.Textbox(
84
+ label="Revision Model Name",
85
+ )
86
+ # Selection for benchmark type from (similartiy, family, function, affinity) to eval the representations (chekbox)
87
+ benchmark_type = gr.CheckboxGroup(
88
+ choices=TASK_INFO,
89
+ label="Benchmark Type",
90
+ interactive=True,
91
+ )
92
+
93
+ with gr.Column():
94
+ human_file = gr.components.File(label="Click to Upload the representation file (csv) for Human dataset", file_count="single", type='binary')
95
+ skempi_file = gr.components.File(label="Click to Upload the representation file (csv) for SKEMPI dataset", file_count="single", type='binary')
96
+
97
+ submit_button = gr.Button("Submit Eval")
98
+ submission_result = gr.Markdown()
99
+ submit_button.click(
100
+ add_new_eval,
101
+ inputs = [
102
+ human_file,
103
+ skempi_file,
104
+ model_name_textbox,
105
+ revision_name_textbox,
106
+ benchmark_type
107
+ ],
108
+ )
109
+
110
+ def refresh_data():
111
+ value = get_baseline_df()
112
+
113
+ return value
114
+
115
+ with gr.Row():
116
+ data_run = gr.Button("Refresh")
117
+ data_run.click(
118
+ refresh_data, outputs=[data_component]
119
+ )
120
+
121
+ with gr.Accordion("Citation", open=False):
122
+ citation_button = gr.Textbox(
123
+ value=CITATION_BUTTON_TEXT,
124
+ label=CITATION_BUTTON_LABEL,
125
+ elem_id="citation-button",
126
+ show_copy_button=True,
127
+ )
128
+
129
+ block.launch()
index.html ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!doctype html>
2
+ <html>
3
+ <head>
4
+ <meta charset="utf-8" />
5
+ <meta name="viewport" content="width=device-width" />
6
+ <title>My static Space</title>
7
+ <link rel="stylesheet" href="style.css" />
8
+ </head>
9
+ <body>
10
+ <div class="card">
11
+ <h1>Welcome to your static Space!</h1>
12
+ <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
+ <p>
14
+ Also don't forget to check the
15
+ <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
+ </p>
17
+ </div>
18
+ </body>
19
+ </html>
pyproject.toml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.ruff]
2
+ # Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
3
+ select = ["E", "F"]
4
+ ignore = ["E501"] # line too long (black is taking care of this)
5
+ line-length = 119
6
+ fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"]
7
+
8
+ [tool.isort]
9
+ profile = "black"
10
+ line_length = 119
11
+
12
+ [tool.black]
13
+ line-length = 119
requirements.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ APScheduler
2
+ black
3
+ datasets
4
+ gradio
5
+ gradio[oauth]
6
+ gradio_leaderboard==0.0.9
7
+ gradio_client
8
+ huggingface-hub>=0.18.0
9
+ python-dateutil
10
+ tqdm
11
+ transformers
12
+ tokenizers>=0.15.0
13
+ sentencepiece
14
+ matplotlib
15
+ numpy
16
+ pandas==1.1.4
17
+ pyyaml==5.1
18
+ scikit-learn==0.22
19
+ scikit-multilearn==0.2.0
style.css ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body {
2
+ padding: 2rem;
3
+ font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
4
+ }
5
+
6
+ h1 {
7
+ font-size: 16px;
8
+ margin-top: 0;
9
+ }
10
+
11
+ p {
12
+ color: rgb(107, 114, 128);
13
+ font-size: 15px;
14
+ margin-bottom: 10px;
15
+ margin-top: 5px;
16
+ }
17
+
18
+ .card {
19
+ max-width: 620px;
20
+ margin: 0 auto;
21
+ padding: 16px;
22
+ border: 1px solid lightgray;
23
+ border-radius: 16px;
24
+ }
25
+
26
+ .card p:last-child {
27
+ margin-bottom: 0;
28
+ }