Spaces:
Configuration error
Configuration error
import json | |
import re | |
from typing import Any, Callable | |
# dict2json -------------------------------- | |
def dict2json(python_dict: dict[str, Any]) -> str: | |
""" | |
Python dictใJSONๆๅญๅใซๅคๆใใ | |
Args: | |
python_dict (dict): ๅคๆใใPython dict | |
Returns: | |
str: ๅคๆใใใJSONๆๅญๅ | |
""" | |
# ensure_ascii: ๆฅๆฌ่ชใจใใๅบๅใใใใ | |
json_string = json.dumps(python_dict, indent=2, ensure_ascii=False) | |
return json_string | |
# optimize token -------------------------------- | |
def optimize_token(text: str, funcs: list[Callable[[str], str]] | None = None) -> str: | |
""" | |
ใใญในใใฎใใผใฏใณใๆ้ฉๅใใใ | |
Args: | |
text (str): ๆ้ฉๅใใใใญในใ | |
Returns: | |
str: ๆ้ฉๅใใใใใญในใ | |
""" | |
funcs = funcs or [minimize_newline, zenkaku_to_hankaku, remove_trailing_spaces] | |
for func in funcs: | |
text = func(text) | |
return text.strip() | |
def _replace_consecutive(text: str, pattern: str, replacing_text: str) -> str: | |
""" | |
ใใญในใๅ ใฎ้ฃ็ถใใใใฟใผใณใซๅฏพใใฆใๆๅฎใใใ็ฝฎๆใใญในใใง็ฝฎๆใใ | |
Args: | |
text (str): ใใญในใ | |
pattern (str): ็ฝฎๆใใใใฟใผใณ | |
replacing_text (str): ็ฝฎๆใใญในใ | |
Returns: | |
str: ็ฝฎๆใใใใใญในใ | |
""" | |
p = re.compile(pattern) | |
matches = [(m.start(), m.end()) for m in p.finditer(text)][::-1] | |
text_replaced = list(text) | |
for i_start, i_end in matches: | |
text_replaced[i_start:i_end] = [replacing_text] | |
return "".join(text_replaced) | |
def minimize_newline(text: str) -> str: | |
""" | |
ใใญในใๅ ใฎ้ฃ็ถใใๆน่กใ2ไปฅไธใซใใ | |
Args: | |
text (str): ใใญในใ | |
Returns: | |
str: ๆน่กใๆๅฐ้ใซใใใใญในใ | |
""" | |
return _replace_consecutive(text, pattern="\n{2,}", replacing_text="\n\n") | |
def zenkaku_to_hankaku(text: str) -> str: | |
""" | |
ใใญในใๅ ใฎๅ จ่งๆๅญใๅ่งๆๅญใซๅคๆใใ | |
Args: | |
text (str): ใใญในใ | |
Returns: | |
str: ๅ่งๆๅญใซๅคๆใใใใใญในใ | |
""" | |
mapping_dict = {"ใ": " ", "๏ผ": ": ", "โ": " ", "๏ผ": "ใ", "๏ผ": "ใ", "๏ฟฅ": "ยฅ"} | |
hankaku_text = "" | |
for char in text: | |
# A-Za-z0-9!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ | |
if char in mapping_dict: | |
hankaku_text += mapping_dict[char] | |
elif 65281 <= ord(char) <= 65374: | |
hankaku_text += chr(ord(char) - 65248) | |
else: | |
hankaku_text += char | |
return hankaku_text | |
def remove_trailing_spaces(text: str) -> str: | |
""" | |
ใใญในใๅ ใฎๅ่กใฎๆซๅฐพใฎในใใผในใๅ้คใใ | |
Args: | |
text (str): ใใญในใ | |
Returns: | |
str: ในใใผในใๅ้คใใใใญในใ | |
""" | |
return "\n".join([line.rstrip() for line in text.split("\n")]) | |