hanhainebula commited on
Commit
48e891a
·
1 Parent(s): c1c3d39

upload code for showing submit infos table

Browse files
Files changed (4) hide show
  1. app.py +28 -51
  2. src/backend.py +60 -6
  3. src/css_html_js.py +105 -0
  4. src/envs.py +11 -3
app.py CHANGED
@@ -1,52 +1,60 @@
1
  import os
 
2
  import logging
 
3
  import gradio as gr
4
  import multiprocessing
5
 
6
  from src.backend import pull_search_results
7
  from src.envs import (
8
  API, REPO_ID, START_COMMIT_ID,
9
- LOG_DIR, LOG_FILE_PATH, HF_CACHE_DIR,
10
  HF_SEARCH_RESULTS_REPO_DIR, HF_EVAL_RESULTS_REPO_DIR,
11
  UNZIP_TARGET_DIR,
12
  TIME_DURATION,
13
  EVAL_K_VALUES,
14
  )
 
15
 
16
  logger = logging.getLogger(__name__)
17
  logging.basicConfig(
18
- filename=LOG_FILE_PATH,
19
- filemode='w',
20
  level=logging.WARNING,
21
- datefmt='%Y-%m-%d %H:%M:%S',
22
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
23
  )
24
 
25
 
26
- def restart_space():
27
- API.restart_space(repo_id=REPO_ID)
28
 
29
 
30
- def get_log_files():
31
- if not os.path.exists(LOG_DIR):
32
- return []
33
- return sorted([f for f in os.listdir(LOG_DIR) if f.endswith('.log')])
 
 
 
 
34
 
35
 
36
- def refresh_log_files():
37
- return get_log_files()
38
-
 
 
 
 
 
 
 
39
 
40
- def display_log_content(selected_file):
41
- if selected_file:
42
- with open(os.path.join(LOG_DIR, selected_file), 'r', encoding='utf-8') as file:
43
- return file.read()
44
- return "No log file selected"
45
 
46
 
47
  if __name__ == "__main__":
48
- os.makedirs(LOG_DIR, exist_ok=True)
49
-
50
  process = multiprocessing.Process(
51
  target=pull_search_results,
52
  args=(
@@ -60,35 +68,4 @@ if __name__ == "__main__":
60
  ),
61
  )
62
  process.start()
63
-
64
- with gr.Blocks() as demo:
65
- gr.Markdown("## Select a log file to view its content")
66
-
67
- log_file_dropdown = gr.Dropdown(
68
- choices=refresh_log_files(),
69
- label="Select log file",
70
- interactive=True,
71
- )
72
- log_content_box = gr.Textbox(
73
- label="Log content",
74
- lines=20,
75
- interactive=False,
76
- )
77
- log_file_list_box = gr.Textbox(
78
- label="\n".join(get_log_files()),
79
- lines=20,
80
- interactive=False,
81
- )
82
- refresh_button = gr.Button("Refresh log files")
83
-
84
- log_file_dropdown.change(
85
- fn=display_log_content,
86
- inputs=log_file_dropdown,
87
- outputs=log_content_box,
88
- )
89
- refresh_button.click(
90
- fn=refresh_log_files,
91
- outputs=log_file_dropdown,
92
- )
93
-
94
  demo.launch()
 
1
  import os
2
+ import json
3
  import logging
4
+ import pandas as pd
5
  import gradio as gr
6
  import multiprocessing
7
 
8
  from src.backend import pull_search_results
9
  from src.envs import (
10
  API, REPO_ID, START_COMMIT_ID,
11
+ HF_CACHE_DIR, SUBMIT_INFOS_SAVE_PATH,
12
  HF_SEARCH_RESULTS_REPO_DIR, HF_EVAL_RESULTS_REPO_DIR,
13
  UNZIP_TARGET_DIR,
14
  TIME_DURATION,
15
  EVAL_K_VALUES,
16
  )
17
+ from src.css_html_js import custom_css
18
 
19
  logger = logging.getLogger(__name__)
20
  logging.basicConfig(
 
 
21
  level=logging.WARNING,
 
22
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
23
  )
24
 
25
 
26
+ # def restart_space():
27
+ # API.restart_space(repo_id=REPO_ID)
28
 
29
 
30
+ def load_submit_infos_df():
31
+ if os.path.exists(SUBMIT_INFOS_SAVE_PATH):
32
+ with open(SUBMIT_INFOS_SAVE_PATH, 'r', encoding='utf-8') as f:
33
+ submit_infos = json.load(f)
34
+ else:
35
+ submit_infos = []
36
+ submit_infos_df = pd.DataFrame(submit_infos)
37
+ return submit_infos_df
38
 
39
 
40
+ with gr.Blocks(css=custom_css) as demo:
41
+ gr.Markdown("## Submission Infos Table")
42
+
43
+ table = gr.Dataframe(
44
+ value=load_submit_infos_df(),
45
+ label="Submission Infos",
46
+ interactive=False,
47
+ )
48
+
49
+ refresh_button = gr.Button("Refresh Submission Infos")
50
 
51
+ refresh_button.click(
52
+ fn=load_submit_infos_df,
53
+ outputs=table,
54
+ )
 
55
 
56
 
57
  if __name__ == "__main__":
 
 
58
  process = multiprocessing.Process(
59
  target=pull_search_results,
60
  args=(
 
68
  ),
69
  )
70
  process.start()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  demo.launch()
src/backend.py CHANGED
@@ -13,16 +13,14 @@ from air_benchmark.evaluation_utils.evaluator import Evaluator
13
 
14
  from src.envs import (
15
  API,
16
- LOG_FILE_PATH, ZIP_CACHE_DIR,
17
- SEARCH_RESULTS_REPO, RESULTS_REPO
 
18
  )
19
 
20
  logger = logging.getLogger(__name__)
21
  logging.basicConfig(
22
- filename=LOG_FILE_PATH,
23
- filemode='w',
24
  level=logging.WARNING,
25
- datefmt='%Y-%m-%d %H:%M:%S',
26
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
27
  )
28
 
@@ -111,6 +109,49 @@ def get_zip_file_path(zip_file_name: str):
111
  return zip_file_path
112
 
113
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  def pull_search_results(
115
  hf_search_results_repo_dir: str,
116
  hf_eval_results_repo_dir: str,
@@ -132,6 +173,13 @@ def pull_search_results(
132
  )
133
  cur_file_paths = get_file_list(hf_search_results_repo_dir, allowed_suffixes=['.json'])
134
  else:
 
 
 
 
 
 
 
135
  cur_file_paths = get_file_list(hf_search_results_repo_dir, allowed_suffixes=['.json'])
136
 
137
  print("Start to pull new search results ...")
@@ -280,7 +328,13 @@ def pull_search_results(
280
  shutil.rmtree(ZIP_CACHE_DIR)
281
  shutil.rmtree(unzip_target_dir)
282
 
 
 
 
 
 
 
283
  # Wait for the next update
284
  logger.warning(f"Wait for {time_duration} seconds for the next update ...")
285
- cur_file_paths = new_file_paths
286
  time.sleep(time_duration)
 
13
 
14
  from src.envs import (
15
  API,
16
+ ZIP_CACHE_DIR,SUBMIT_INFOS_SAVE_PATH,
17
+ SEARCH_RESULTS_REPO, RESULTS_REPO,
18
+ make_clickable_model
19
  )
20
 
21
  logger = logging.getLogger(__name__)
22
  logging.basicConfig(
 
 
23
  level=logging.WARNING,
 
24
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
25
  )
26
 
 
109
  return zip_file_path
110
 
111
 
112
+ def find_file(file_name: str, dir_path: str):
113
+ if not os.path.exists(dir_path) or not os.path.isdir(dir_path):
114
+ return False
115
+ for root, _, files in os.walk(dir_path):
116
+ for file in files:
117
+ if file == file_name:
118
+ return True
119
+ return False
120
+
121
+
122
+ def get_submit_infos_list(file_paths: List[str], eval_results_dir: str) -> dict:
123
+ submit_infos_list = []
124
+ for file_path in file_paths:
125
+ submit_info = {
126
+ 'Rank': None,
127
+ 'Submission Date': None,
128
+ 'Benchmark Version': None,
129
+ 'Retrieval Method': None,
130
+ 'Reranking Method': None,
131
+ 'Revision': None,
132
+ 'Status': None,
133
+ }
134
+ file_name = os.path.basename(file_path).split('.')[0]
135
+ rank_time = file_name.split('-')[0]
136
+ with open(file_path, 'r', encoding='utf-8') as f:
137
+ metadata = json.load(f)
138
+ submit_info['Rank'] = rank_time
139
+ submit_info['Submission Date'] = metadata['timestamp']
140
+ submit_info['Benchmark Version'] = metadata['version']
141
+ submit_info['Retrieval Method'] = make_clickable_model(metadata['model_name'], metadata['model_url'])
142
+ submit_info['Reranking Method'] = make_clickable_model(metadata['reranker_name'], metadata['reranker_url'])
143
+ submit_info['Revision'] = metadata['revision']
144
+ if find_file(f"results_{file_name}.json", eval_results_dir):
145
+ submit_info['Status'] = "✔️ Success"
146
+ else:
147
+ submit_info['Status'] = "❌ Failed"
148
+ submit_infos_list.append(submit_info)
149
+ sorted_submit_infos_list = sorted(submit_infos_list, key=lambda x: x['Rank'], reverse=True)
150
+ for rank, submit_info in enumerate(sorted_submit_infos_list, 1):
151
+ submit_info['Rank'] = rank
152
+ return sorted_submit_infos_list
153
+
154
+
155
  def pull_search_results(
156
  hf_search_results_repo_dir: str,
157
  hf_eval_results_repo_dir: str,
 
173
  )
174
  cur_file_paths = get_file_list(hf_search_results_repo_dir, allowed_suffixes=['.json'])
175
  else:
176
+ API.snapshot_download(
177
+ repo_id=SEARCH_RESULTS_REPO,
178
+ repo_type="dataset",
179
+ local_dir=hf_search_results_repo_dir,
180
+ etag_timeout=30,
181
+ allow_patterns=['*.json']
182
+ )
183
  cur_file_paths = get_file_list(hf_search_results_repo_dir, allowed_suffixes=['.json'])
184
 
185
  print("Start to pull new search results ...")
 
328
  shutil.rmtree(ZIP_CACHE_DIR)
329
  shutil.rmtree(unzip_target_dir)
330
 
331
+ # update submit infos
332
+ cur_file_paths = new_file_paths
333
+ submit_infos_list = get_submit_infos_list(cur_file_paths, hf_eval_results_repo_dir)
334
+ with open(SUBMIT_INFOS_SAVE_PATH, 'w', encoding='utf-8') as f:
335
+ json.dump(submit_infos_list, f, ensure_ascii=False, indent=4)
336
+
337
  # Wait for the next update
338
  logger.warning(f"Wait for {time_duration} seconds for the next update ...")
339
+
340
  time.sleep(time_duration)
src/css_html_js.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_css = """
2
+
3
+ .markdown-text {
4
+ font-size: 16px !important;
5
+ }
6
+
7
+ #models-to-add-text {
8
+ font-size: 18px !important;
9
+ }
10
+
11
+ #citation-button span {
12
+ font-size: 16px !important;
13
+ }
14
+
15
+ #citation-button textarea {
16
+ font-size: 16px !important;
17
+ }
18
+
19
+ #citation-button > label > button {
20
+ margin: 6px;
21
+ transform: scale(1.3);
22
+ }
23
+
24
+ #leaderboard-table {
25
+ margin-top: 15px
26
+ }
27
+
28
+ #leaderboard-table-lite {
29
+ margin-top: 15px
30
+ }
31
+
32
+ #search-bar-table-box > div:first-child {
33
+ background: none;
34
+ border: none;
35
+ }
36
+
37
+ #search-bar {
38
+ padding: 0px;
39
+ }
40
+
41
+ /* Limit the width of the first AutoEvalColumn so that names don't expand too much */
42
+ table td:first-child,
43
+ table th:first-child {
44
+ max-width: 400px;
45
+ overflow: auto;
46
+ white-space: nowrap;
47
+ }
48
+
49
+ .tab-buttons button {
50
+ font-size: 20px;
51
+ }
52
+
53
+ #scale-logo {
54
+ border-style: none !important;
55
+ box-shadow: none;
56
+ display: block;
57
+ margin-left: auto;
58
+ margin-right: auto;
59
+ max-width: 600px;
60
+ }
61
+
62
+ #scale-logo .download {
63
+ display: none;
64
+ }
65
+ #filter_type{
66
+ border: 0;
67
+ padding-left: 0;
68
+ padding-top: 0;
69
+ }
70
+ #filter_type label {
71
+ display: flex;
72
+ }
73
+ #filter_type label > span{
74
+ margin-top: var(--spacing-lg);
75
+ margin-right: 0.5em;
76
+ }
77
+ #filter_type label > .wrap{
78
+ width: 103px;
79
+ }
80
+ #filter_type label > .wrap .wrap-inner{
81
+ padding: 2px;
82
+ }
83
+ #filter_type label > .wrap .wrap-inner input{
84
+ width: 1px
85
+ }
86
+ #filter-columns-type{
87
+ border:0;
88
+ padding:0.5;
89
+ }
90
+ #filter-columns-size{
91
+ border:0;
92
+ padding:0.5;
93
+ }
94
+ #box-filter > .form{
95
+ border: 0
96
+ }
97
+ """
98
+
99
+ get_window_url_params = """
100
+ function(url_params) {
101
+ const params = new URLSearchParams(window.location.search);
102
+ url_params = Object.fromEntries(params);
103
+ return url_params;
104
+ }
105
+ """
src/envs.py CHANGED
@@ -1,5 +1,4 @@
1
  import os
2
- import time
3
  from huggingface_hub import HfApi
4
 
5
 
@@ -22,8 +21,7 @@ CACHE_PATH = os.getenv("HF_HOME", ".")
22
  HF_CACHE_DIR = os.path.join(CACHE_PATH, ".cache")
23
  ZIP_CACHE_DIR = os.path.join(CACHE_PATH, ".zip_cache")
24
 
25
- LOG_DIR = os.path.join(CACHE_PATH, "logs")
26
- LOG_FILE_PATH = os.path.join(LOG_DIR, f"backend_{time.strftime('%Y-%m-%d_%H-%M-%S')}.log")
27
 
28
  API = HfApi(token=HF_TOKEN)
29
 
@@ -35,3 +33,13 @@ UNZIP_TARGET_DIR = os.path.join(CACHE_PATH, "unzip_target_dir")
35
  TIME_DURATION = 300 # seconds
36
 
37
  EVAL_K_VALUES = [1, 3, 5, 10, 50, 100, 1000]
 
 
 
 
 
 
 
 
 
 
 
1
  import os
 
2
  from huggingface_hub import HfApi
3
 
4
 
 
21
  HF_CACHE_DIR = os.path.join(CACHE_PATH, ".cache")
22
  ZIP_CACHE_DIR = os.path.join(CACHE_PATH, ".zip_cache")
23
 
24
+ SUBMIT_INFOS_SAVE_PATH = os.path.join(CACHE_PATH, "submit_infos.json")
 
25
 
26
  API = HfApi(token=HF_TOKEN)
27
 
 
33
  TIME_DURATION = 300 # seconds
34
 
35
  EVAL_K_VALUES = [1, 3, 5, 10, 50, 100, 1000]
36
+
37
+ def model_hyperlink(link, model_name):
38
+ return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
39
+
40
+
41
+ def make_clickable_model(model_name: str, model_link: str):
42
+ # link = f"https://huggingface.co/{model_name}"
43
+ if not model_link or not model_link.startswith("https://"):
44
+ return model_name
45
+ return model_hyperlink(model_link, model_name)