File size: 3,903 Bytes
77ba698 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
{
"cells": [
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import json\n",
"import orjson\n",
"\n",
"\n",
"def normalize_file_name(file_name):\n",
" return file_name.replace('/', '_')\n",
"\n",
"\n",
"def keep_key(key):\n",
" if key.endswith(\"acc\"):\n",
" return False\n",
" \n",
" if \"sciq\" in key:\n",
" return False\n",
"\n",
" if \"siqa\" in key:\n",
" return False\n",
"\n",
" return True\n",
"\n",
"\n",
"def get_slider_max(data):\n",
" metrics = data[list(data.keys())[0]]\n",
" metric_data = metrics[list(metrics.keys())[0]]\n",
" samples = len(metric_data[\"x\"])\n",
" if samples < 20:\n",
" return 10\n",
" return 30\n",
"\n",
"\n",
"def create_index(data, traces, layout, default_window_size, default_metric):\n",
" print(default_metric if default_metric else \"None\")\n",
" files_data = {}\n",
" index_files = {}\n",
" for task_id, task_data in (data.items() if data else traces.items()):\n",
" data_name = \"data\" if data else \"traces\"\n",
" files_data[task_id] = {\n",
" data_name: task_data,\n",
" \"layout\": layout\n",
" }\n",
" index_files[task_id] = {\n",
" \"file\": f\"{normalize_file_name(task_id)}.json\"\n",
" }\n",
" settings = {\n",
" \"slider\": {\n",
" \"min\": 0,\n",
" \"max\": get_slider_max(data),\n",
" \"default\": default_window_size,\n",
" },\n",
" \"defaultMetric\": default_metric\n",
" } if data else {\"slider\": None}\n",
" \n",
" return files_data, index_files, settings\n",
" \n",
" \n",
"\n",
"new_data = {}\n",
"\n",
"for file_name in os.listdir('./data/plots'):\n",
" if not file_name.endswith('.json'):\n",
" continue\n",
" with open(f'./data/plots/{file_name}', 'r') as file:\n",
" old_data = orjson.loads(file.read())\n",
" data = {key: value for key, value in old_data[\"data\"].items() if keep_key(key)} if \"data\" in old_data else {}\n",
" traces = {key: value for key, value in old_data[\"traces\"].items()} if \"traces\" in old_data else {}\n",
" default_window_size = old_data[\"defaultWindowSize\"] if \"defaultWindowSize\" in old_data else None\n",
" default_metric = old_data[\"defaultMetric\"] if \"defaultMetric\" in old_data else None\n",
" files_data, index_files, settings = create_index(data, traces, old_data[\"layout\"], default_window_size, default_metric)\n",
" # mkdir\n",
" dir_name = file_name.split('.')[0]\n",
" os.makedirs(f'./data/plots/{dir_name}', exist_ok=True)\n",
" with open(f'./data/plots/{dir_name}/index.json', 'wb') as file:\n",
" file.write(orjson.dumps({\n",
" \"files\": index_files,\n",
" \"settings\": settings,\n",
" }))\n",
" \n",
" for metric_name, data in files_data.items():\n",
" with open(f'./data/plots/{dir_name}/{normalize_file_name(metric_name)}.json', 'wb') as file:\n",
" file.write(orjson.dumps(data))\n",
"\n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "datatrove3.10",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
|