Spaces:
Configuration error
Configuration error
File size: 2,922 Bytes
88435ed |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import json
import re
from typing import Any, Callable
# dict2json --------------------------------
def dict2json(python_dict: dict[str, Any]) -> str:
"""
Python dictใJSONๆๅญๅใซๅคๆใใ
Args:
python_dict (dict): ๅคๆใใPython dict
Returns:
str: ๅคๆใใใJSONๆๅญๅ
"""
# ensure_ascii: ๆฅๆฌ่ชใจใใๅบๅใใใใ
json_string = json.dumps(python_dict, indent=2, ensure_ascii=False)
return json_string
# optimize token --------------------------------
def optimize_token(text: str, funcs: list[Callable[[str], str]] | None = None) -> str:
"""
ใใญในใใฎใใผใฏใณใๆ้ฉๅใใใ
Args:
text (str): ๆ้ฉๅใใใใญในใ
Returns:
str: ๆ้ฉๅใใใใใญในใ
"""
funcs = funcs or [minimize_newline, zenkaku_to_hankaku, remove_trailing_spaces]
for func in funcs:
text = func(text)
return text.strip()
def _replace_consecutive(text: str, pattern: str, replacing_text: str) -> str:
"""
ใใญในใๅ
ใฎ้ฃ็ถใใใใฟใผใณใซๅฏพใใฆใๆๅฎใใใ็ฝฎๆใใญในใใง็ฝฎๆใใ
Args:
text (str): ใใญในใ
pattern (str): ็ฝฎๆใใใใฟใผใณ
replacing_text (str): ็ฝฎๆใใญในใ
Returns:
str: ็ฝฎๆใใใใใญในใ
"""
p = re.compile(pattern)
matches = [(m.start(), m.end()) for m in p.finditer(text)][::-1]
text_replaced = list(text)
for i_start, i_end in matches:
text_replaced[i_start:i_end] = [replacing_text]
return "".join(text_replaced)
def minimize_newline(text: str) -> str:
"""
ใใญในใๅ
ใฎ้ฃ็ถใใๆน่กใ2ไปฅไธใซใใ
Args:
text (str): ใใญในใ
Returns:
str: ๆน่กใๆๅฐ้ใซใใใใญในใ
"""
return _replace_consecutive(text, pattern="\n{2,}", replacing_text="\n\n")
def zenkaku_to_hankaku(text: str) -> str:
"""
ใใญในใๅ
ใฎๅ
จ่งๆๅญใๅ่งๆๅญใซๅคๆใใ
Args:
text (str): ใใญในใ
Returns:
str: ๅ่งๆๅญใซๅคๆใใใใใญในใ
"""
mapping_dict = {"ใ": " ", "๏ผ": ": ", "โ": " ", "๏ผ": "ใ", "๏ผ": "ใ", "๏ฟฅ": "ยฅ"}
hankaku_text = ""
for char in text:
# A-Za-z0-9!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
if char in mapping_dict:
hankaku_text += mapping_dict[char]
elif 65281 <= ord(char) <= 65374:
hankaku_text += chr(ord(char) - 65248)
else:
hankaku_text += char
return hankaku_text
def remove_trailing_spaces(text: str) -> str:
"""
ใใญในใๅ
ใฎๅ่กใฎๆซๅฐพใฎในใใผในใๅ้คใใ
Args:
text (str): ใใญในใ
Returns:
str: ในใใผในใๅ้คใใใใญในใ
"""
return "\n".join([line.rstrip() for line in text.split("\n")])
|