File size: 4,429 Bytes
b84549f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import csv
import os
import shutil
import json
import yaml
from typing import Any, List, Tuple, Union
from copy import deepcopy
from .log import logger
from .others import get_cur_time_str
from .file import ensure_dir
class CSVDataRecord:
"""Collect data into CSV file.
Automatically backup existed file which has the same file name to avoid DATA LOST:
```
# data lost: all content in ./a-file-contains-important-data.csv will be
# flushed and unrecoverable if it's opened by 'w':
with open('./a-file-contains-important-data.csv', 'w') as f:
# do sth.
```
Assuming a scene (actually it was my sad experience):
- The code above is in the top of your experimental code,
- And you've finished this experiment and collected the data into the CSV file.
- After that, if you run this script file again accidentally, then all valuable data will be lost!
:attr:`CSVDataRecord` makes this scene never happen again.
"""
def __init__(self, file_path: str, header: List[str], backup=True):
"""Open the file and write CSV header into it.
Args:
file_path (str): Target CSV file path.
header (List[str]): CSV header, like `['name', 'age', 'sex', ...]`.
backup (bool, optional): If True, the existed file in :attr:`file_path` will be backup to `file_path + '.' + cur timestamp`. Defaults to True.
"""
self.file_path = file_path
self.header = header
if backup and os.path.exists(file_path):
backup_file_path = '{}.{}'.format(file_path, get_cur_time_str())
shutil.copyfile(file_path, backup_file_path)
logger.warn('csv file already exists! backup raw file to {}'.format(backup_file_path))
ensure_dir(file_path)
with open(file_path, 'w') as f:
writer = csv.writer(f)
writer.writerow(header)
def write(self, data: Union[List[Any], Tuple[Any]]):
"""Write a row of data to file in :attr:`file_path`.
Args:
data (Union[List[Any], Tuple[Any]]): A row of data, like `('ekko', 18, 'man')`.
"""
assert len(data) == len(self.header)
with open(self.file_path, 'a') as f:
writer = csv.writer(f)
writer.writerow(data)
def write_json(file_path: str, obj: Any, indent=2, backup=True, ensure_obj_serializable=False):
"""Collect data into JSON file.
Automatically backup existed file which has the same file name to avoid DATA LOST. (refers to :class:`CSVDataRecord`)
Args:
file_path (str): Target JSON file path.
obj (Any): Collected data which can be serialized into JSON format.
indent (int, optional): Keep indent to ensure readability. Defaults to 2.
backup (bool, optional): If True, the existed file in :attr:`file_path` will be \
backup to `file_path + '.' + cur timestamp`. Defaults to True.
"""
if backup and os.path.exists(file_path):
backup_file_path = '{}.{}'.format(file_path, get_cur_time_str())
shutil.copyfile(file_path, backup_file_path)
logger.warn('json file already exists! backup raw file to {}'.format(backup_file_path))
ensure_dir(file_path)
if ensure_obj_serializable:
obj = deepcopy(obj)
make_obj_json_serializable(obj)
with open(file_path, 'w', encoding='utf8') as f:
obj_str = json.dumps(obj, indent=indent, ensure_ascii=False)
f.write(obj_str)
def read_json(file_path: str):
"""Read JSON file.
Args:
file_path (str): Target JSON file path.
Returns:
Any: The object parsed from the target file.
"""
with open(file_path, 'r', encoding='utf8') as f:
return json.loads(f.read())
def read_yaml(file_path: str):
"""Read YAML file.
Args:
file_path (str): Target YAML file path.
Returns:
Any: The object parsed from the target file.
"""
with open(file_path, 'r') as f:
return yaml.load(f, yaml.Loader)
import inspect
import torch
def make_obj_json_serializable(obj):
for k, v in obj.items():
if isinstance(v, dict):
obj[k] = make_obj_json_serializable(v)
elif hasattr(v, '__call__'):
obj[k] = inspect.getsource(v)
elif isinstance(v, torch.Tensor):
obj[k] = str(v)
return obj |