Spaces:
Runtime error
Runtime error
File size: 8,469 Bytes
a57c6eb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 |
# -*- coding: UTF-8 -*-
"""
__author__ = zhiqiangxia
__date__ = 2019-03-18
"""
import os
import time
import json
import importlib.util
from typing import Any, Tuple, Dict, List, Iterable
from collections import Counter
import pandas as pd
import yaml
import pandas as pd
def dict2list(dct: dict) -> list:
"""将字典转换为列表,若值为列表,使用extend而不是append
Args:
dct (dict):
Returns:
list:
"""
lst = []
for k, v in dct.items():
if isinstance(v, list):
lst.extend(v)
else:
lst.append(v)
return lst
class Timer(object):
"""A simple timer."""
def __init__(self):
self.total_time = 0.0
self.calls = 0
self.start_time = 0.0
self.diff = 0.0
self.average_time = 0.0
def tic(self) -> float:
# using time.time instead of time.clock because time time.clock
# does not normalize for multithreading
self.start_time = time.time()
def toc(self, average: bool = True) -> float:
self.diff = time.time() - self.start_time
self.total_time += self.diff
self.calls += 1
self.average_time = self.total_time / self.calls
if average:
return self.average_time
else:
return self.diff
def load_dct_from_file(path: str, key=None) -> dict:
"""读取字典类型的文件
Args:
path (str): 字典文件路径
Raises:
ValueError: 不支持该字典文件类型,仅支持json、yaml、python中的字典key
Returns:
dict: 读取的字典
"""
if path.endswith(".json"):
dct = load_json(path)
elif path.endswith(".yaml"):
dct = load_yaml(path)
elif path.endswith(".py"):
dct = load_edct_py(path, key)
else:
raise ValueError("unsupported config file")
return dct
def load_json(path: str) -> dict:
"""读取json文件
Args:
path (str): json路径
Returns:
dict: 读取后的python 字典
"""
with open(path, "r", encoding="utf-8") as f:
dct = json.load(f)
return dct
def load_yaml(path: str) -> dict:
"""读取yaml文件
Args:
path (str): yaml路径
Returns:
dict: 读取后的python 字典
"""
dct = yaml.load(path)
return dct
def load_edct_py(path: str, obj_name: str = None) -> dict:
"""读取Python中的字典
Args:
path (str): py文件路径
obj_name (str): py文件中的字典变量名
Returns:
dict: 读取后的字典
"""
module_name = "module_name"
spec = importlib.util.spec_from_file_location(module_name, path)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
dct = module if obj_name is None else getattr(module, obj_name)
return dct
def merge_dct(target_dct: dict, source_dct: dict = None) -> None:
"""
merge source_dct into target_dct
"""
if source_dct is not None:
for k, v in source_dct.items():
if k not in target_dct:
target_dct[k] = v
else:
if not isinstance(v, dict):
target_dct[k] = v
else:
merge_dct(target_dct[k], source_dct[k])
def convert_class_attr_to_dict(
obj: object, target_keys: list = None, ignored_keys: list = None
) -> dict:
"""将类中的属性转化成字典,默认转化为所有属性。
Args:
obj (object): 类对象
target_keys (list, optional): 需要保存的属性. Defaults to None.
ignored_keys (list, optional): 需要忽视的属性. Defaults to None.
Returns:
dict: 转换后的字典
"""
if target_keys is not None:
dct = {k: v for k, v in obj.__dict__.items() if k in target_keys}
return dct
if ignored_keys is None:
ignored_keys = []
dct = {k: v for k, v in obj.__dict__.items() if k not in ignored_keys}
return dct
def merge_list_continuous_same_element(lst: List[Any]) -> List[Dict[str, Any]]:
"""将一层列表的相邻值合并,并返回每一个不同值的stat、end、元素值
Args:
lst (List[Any]): _description_
Returns:
List[Dict[str, Any]]: 合并后的列表结果,形如
[
{
"star": x,
"end": x,
"element": x,
},
]
"""
merge_lst = []
if len(lst) == 0:
return lst
elif len(lst) == 1:
return {"start": 0, "end": 0, "element": lst[0]}
start = 0
end = 0
last_element = lst[end]
for i, element in enumerate(lst):
if i == 0:
continue
if i == len(lst) - 1:
if element != last_element:
dct = {"start": start, "end": end, "element": last_element}
merge_lst.append(dct)
last = {"start": len(lst) - 1, "end": i, "element": element}
merge_lst.append(last)
else:
last = {"start": start, "end": i, "element": element}
merge_lst.append(last)
break
if element != last_element:
dct = {"start": start, "end": end, "element": last_element}
merge_lst.append(dct)
start = i
last_element = element
end = i
return merge_lst
def flatten2generator(lst: Iterable, ignored_iterable_types: List = None):
"""将一个嵌套迭代器展开成生成器,
Args:
lst (Iterable): 待展开的迭代器
ignored_iterable_types (_type_, List): 如果待展开的迭代器在该目标列表中,则不展开. Defaults to None.
Yields:
_type_: 不是迭代器的类型,或者 ignored_iterable_types中的类型
"""
if ignored_iterable_types is None:
ignored_iterable_types = []
for element in lst:
if (
isinstance(element, Iterable)
and type(element) not in ignored_iterable_types
):
for subc in flatten2generator(element):
yield subc
else:
yield element
def flatten(lst: List, ignored_iterable_types=None) -> List:
"""将 flatten_nested_iterable_2_generator展开的生成器转化为迭代器,容器目前使用 list
Args:
lst (List): _description_
ignored_iterable_types (_type_, List): 如果待展开的迭代器在该目标列表中,则不展开. Defaults to None.
Returns:
List: _description_
"""
return list(flatten2generator(lst, ignored_iterable_types=ignored_iterable_types))
def get_current_strtime(fmt: str = "%Y-%m-%d %H:%M:%S") -> str:
current_time = time.strftime(fmt, time.localtime())
return current_time
def advanced_count(df: Iterable) -> Dict:
"""对迭代器中的数值内容进行统计
Args:
df (Iterable): 值为可统计的迭代器,如str, int, float等
Returns:
Dict: 统计结果
"""
n_all = len(df)
count = Counter(df)
new_count = {"total": n_all}
for k, v in count.items():
new_count[k] = v
new_count["{}_ratio".format(k)] = round(v / n_all * 100, 2)
return new_count
class CustomCounter(object):
def __init__(self, name: str) -> None:
"""多类别统计器,支持输入值的类别,针对每种类别分别统计
Args:
name (str): _description_
"""
self.name = name
self._category_col = "category"
self._value_col = "value"
self._df = pd.DataFrame(columns=[self._category_col, self._value_col])
def update(self, v, k: str = "default") -> None:
new = pd.DataFrame([{self._category_col: k, self._value_col: v}])
self._df = pd.concat([self._df, new], axis=0)
def advanced_count(
self,
) -> Dict:
dct = {"total": self.simple_count()}
if len(self._df[self._category_col] != "default") > 0:
for k, k_df in self._df.groupby(self._category_col):
dct[k] = advanced_count(k_df[self._value_col])
return dct
def simple_count(
self,
) -> Dict:
return advanced_count(self._df[self._value_col])
def count(self, is_simple: bool = False) -> Dict:
if is_simple:
return self.simple_count()
else:
return self.advanced_count()
|