Spaces:
Running
Running
chore: Reorder arguments in run_model_task function
Browse files- .gitignore +2 -0
- calculate_memory_usage.py +10 -7
- code_efficiency_calculator.py +15 -19
- leaderboard.py +1 -2
- requirements.txt +3 -0
.gitignore
CHANGED
@@ -1,3 +1,5 @@
|
|
|
|
|
|
1 |
# Created by https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,macos,windows
|
2 |
# Edit at https://www.toptal.com/developers/gitignore?templates=python,visualstudiocode,macos,windows
|
3 |
|
|
|
1 |
+
results/
|
2 |
+
|
3 |
# Created by https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,macos,windows
|
4 |
# Edit at https://www.toptal.com/developers/gitignore?templates=python,visualstudiocode,macos,windows
|
5 |
|
calculate_memory_usage.py
CHANGED
@@ -1,7 +1,5 @@
|
|
1 |
-
import json
|
2 |
import os
|
3 |
import glob
|
4 |
-
import numpy as np
|
5 |
import argparse
|
6 |
from code_efficiency_calculator import run_model_task
|
7 |
|
@@ -52,8 +50,8 @@ def report_max_memory_usage(dat_file_path):
|
|
52 |
max_memory_usage = max(max_memory_usage, mem_in_mb)
|
53 |
return max_memory_usage
|
54 |
|
55 |
-
def report_results(task,model):
|
56 |
-
run_model_task(task,model)
|
57 |
dat_directory = f"./results/{task}_{model}"
|
58 |
canonical_solution_directory = f"./results/{task}_canonical_solution"
|
59 |
canonical_solution_memory_usage = {}
|
@@ -209,12 +207,17 @@ def report_results(task,model):
|
|
209 |
total_500_nmu = total_500_nmu/len(normalized_execution_time_list)*100
|
210 |
total_500_tmu = total_500_tmu/len(normalized_execution_time_list)*100
|
211 |
|
212 |
-
|
|
|
213 |
|
214 |
if __name__ == "__main__":
|
215 |
parse = argparse.ArgumentParser()
|
216 |
parse.add_argument("--task", type=str, default="EffiBench")
|
217 |
parse.add_argument("--model", type=str, default="gpt-4")
|
218 |
-
|
219 |
args = parse.parse_args()
|
220 |
-
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
import glob
|
|
|
3 |
import argparse
|
4 |
from code_efficiency_calculator import run_model_task
|
5 |
|
|
|
50 |
max_memory_usage = max(max_memory_usage, mem_in_mb)
|
51 |
return max_memory_usage
|
52 |
|
53 |
+
def report_results(task, model, file):
|
54 |
+
run_model_task(task, model, file)
|
55 |
dat_directory = f"./results/{task}_{model}"
|
56 |
canonical_solution_directory = f"./results/{task}_canonical_solution"
|
57 |
canonical_solution_memory_usage = {}
|
|
|
207 |
total_500_nmu = total_500_nmu/len(normalized_execution_time_list)*100
|
208 |
total_500_tmu = total_500_tmu/len(normalized_execution_time_list)*100
|
209 |
|
210 |
+
return f"{model}&{total_execution_time:.2f}&{normalized_execution_time:.2f}&{max_net:.2f}&{total_500_net:.1f}&{total_max_memory_usage:.2f}&{normalized_max_memory_usage:.2f}&{max_nmu:.2f}&{total_500_nmu:.1f}&{total_memory_usage:.2f}&{normalized_memory_usage:.2f}&{max_tmu:.2f}&{total_500_tmu:.1f}&{pass1:.1f}\\\\"
|
211 |
+
|
212 |
|
213 |
if __name__ == "__main__":
|
214 |
parse = argparse.ArgumentParser()
|
215 |
parse.add_argument("--task", type=str, default="EffiBench")
|
216 |
parse.add_argument("--model", type=str, default="gpt-4")
|
217 |
+
parse.add_argument("--file", type=str, default="")
|
218 |
args = parse.parse_args()
|
219 |
+
|
220 |
+
if not args.file:
|
221 |
+
args.file = f"./{args.task}_{args.model}.json"
|
222 |
+
|
223 |
+
report_results(args.task,args.model, args.file)
|
code_efficiency_calculator.py
CHANGED
@@ -11,22 +11,12 @@ import os
|
|
11 |
import re
|
12 |
import shutil
|
13 |
import contextlib
|
14 |
-
import
|
15 |
-
import json
|
16 |
-
from typing import Optional, Callable, Dict
|
17 |
-
from concurrent.futures import ThreadPoolExecutor, as_completed
|
18 |
-
import inspect
|
19 |
-
import numpy as np
|
20 |
-
import sys
|
21 |
import concurrent.futures
|
22 |
-
import time
|
23 |
from tqdm import tqdm
|
24 |
import contextlib
|
25 |
-
import faulthandler
|
26 |
import io
|
27 |
import os
|
28 |
-
import multiprocessing
|
29 |
-
import platform
|
30 |
import signal
|
31 |
from tqdm import tqdm
|
32 |
|
@@ -618,13 +608,14 @@ def fetch_completion(dataset,model):
|
|
618 |
return dataset
|
619 |
|
620 |
|
621 |
-
def run_model_task(task,model):
|
622 |
|
623 |
if "/" in model:
|
624 |
model = model.split("/")[1]
|
625 |
dat_path = f"./results/{task}_{model}"
|
626 |
canonical_solution_path = f"./results/{task}_canonical_solution"
|
627 |
-
|
|
|
628 |
dataset = json.load(f)
|
629 |
|
630 |
if os.path.exists(dat_path):
|
@@ -639,7 +630,7 @@ def run_model_task(task,model):
|
|
639 |
|
640 |
fetch_completion(dataset,dat_path)
|
641 |
|
642 |
-
with open(
|
643 |
dataset = json.load(f)
|
644 |
for i in range(len(dataset)):
|
645 |
dataset[i]["dataset"] = f"{task}"
|
@@ -647,9 +638,14 @@ def run_model_task(task,model):
|
|
647 |
|
648 |
|
649 |
if __name__ == "__main__":
|
650 |
-
|
651 |
-
|
652 |
-
|
653 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
654 |
|
655 |
-
run_model_task(args.model,args.task)
|
|
|
11 |
import re
|
12 |
import shutil
|
13 |
import contextlib
|
14 |
+
from concurrent.futures import ThreadPoolExecutor
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
import concurrent.futures
|
|
|
16 |
from tqdm import tqdm
|
17 |
import contextlib
|
|
|
18 |
import io
|
19 |
import os
|
|
|
|
|
20 |
import signal
|
21 |
from tqdm import tqdm
|
22 |
|
|
|
608 |
return dataset
|
609 |
|
610 |
|
611 |
+
def run_model_task(task, model, file):
|
612 |
|
613 |
if "/" in model:
|
614 |
model = model.split("/")[1]
|
615 |
dat_path = f"./results/{task}_{model}"
|
616 |
canonical_solution_path = f"./results/{task}_canonical_solution"
|
617 |
+
|
618 |
+
with open(file, "r") as f:
|
619 |
dataset = json.load(f)
|
620 |
|
621 |
if os.path.exists(dat_path):
|
|
|
630 |
|
631 |
fetch_completion(dataset,dat_path)
|
632 |
|
633 |
+
with open(file, "r") as f:
|
634 |
dataset = json.load(f)
|
635 |
for i in range(len(dataset)):
|
636 |
dataset[i]["dataset"] = f"{task}"
|
|
|
638 |
|
639 |
|
640 |
if __name__ == "__main__":
|
641 |
+
parse = argparse.ArgumentParser()
|
642 |
+
parse.add_argument("--task", type=str, default="EffiBench")
|
643 |
+
parse.add_argument("--model", type=str, default="gpt-4")
|
644 |
+
parse.add_argument("--file", type=str, default="")
|
645 |
+
args = parse.parse_args()
|
646 |
+
|
647 |
+
if not args.file:
|
648 |
+
args.file = f"./{args.task}_{args.model}.json"
|
649 |
+
|
650 |
+
run_model_task(args.task, args.model, args.file)
|
651 |
|
|
leaderboard.py
CHANGED
@@ -63,8 +63,7 @@ def process_uploaded_file(file):
|
|
63 |
except Exception as e:
|
64 |
return f"Error parsing the task and model name from the file name: {str(e)}! Should be in the format of <task>_<model>.json"
|
65 |
|
66 |
-
|
67 |
-
|
68 |
|
69 |
def build_leaderboard_tab(leaderboard_table_file):
|
70 |
gr.Markdown(make_default_md_1(), elem_id="leaderboard_markdown")
|
|
|
63 |
except Exception as e:
|
64 |
return f"Error parsing the task and model name from the file name: {str(e)}! Should be in the format of <task>_<model>.json"
|
65 |
|
66 |
+
return report_results(task, model, file)
|
|
|
67 |
|
68 |
def build_leaderboard_tab(leaderboard_table_file):
|
69 |
gr.Markdown(make_default_md_1(), elem_id="leaderboard_markdown")
|
requirements.txt
CHANGED
@@ -1,3 +1,6 @@
|
|
|
|
|
|
|
|
1 |
plotly
|
2 |
line_profiler
|
3 |
memory_profiler
|
|
|
1 |
+
tqdm
|
2 |
+
numpy
|
3 |
+
gradio
|
4 |
plotly
|
5 |
line_profiler
|
6 |
memory_profiler
|