File size: 1,270 Bytes
b257b3e bd09cee b257b3e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
import pandas as pd
from src.display.utils import BENCHMARK_COLS
from src.about import Tasks
from src.leaderboard.read_evals import get_raw_eval_results
print("Tasks definitions:")
for task in Tasks:
print(f"- {task.name}: benchmark={task.value.benchmark}, metric={task.value.metric}, col_name={task.value.col_name}")
print("\nBenchmark columns:", BENCHMARK_COLS)
try:
# Get raw results first
raw_results = get_raw_eval_results("eval-results", "eval-queue")
print("\nRaw results:")
for result in raw_results:
print("\nResult:")
print("- eval_name:", result.eval_name)
print("- results:", result.results)
data_dict = result.to_dict()
print("- data_dict:", data_dict)
# Convert to DataFrame
all_data_json = [v.to_dict() for v in raw_results]
df = pd.DataFrame.from_records(all_data_json)
print("\nDataFrame columns:", df.columns.tolist())
print("\nDataFrame contents:")
print(df)
except Exception as e:
print("\nError:", str(e))
import traceback
traceback.print_exc()
# Print raw data for debugging
print("\nRaw data from results file:")
import json
with open("eval-results/results_1.json") as f:
print(json.dumps(json.load(f), indent=2))
|