Kpenciler's picture
Upload 53 files
88435ed verified
import json
import re
from typing import Any, Callable
# dict2json --------------------------------
def dict2json(python_dict: dict[str, Any]) -> str:
"""
Python dictใ‚’JSONๆ–‡ๅญ—ๅˆ—ใซๅค‰ๆ›ใ™ใ‚‹
Args:
python_dict (dict): ๅค‰ๆ›ใ™ใ‚‹Python dict
Returns:
str: ๅค‰ๆ›ใ•ใ‚ŒใŸJSONๆ–‡ๅญ—ๅˆ—
"""
# ensure_ascii: ๆ—ฅๆœฌ่ชžใจใ‹ใ‚’ๅ‡บๅŠ›ใ™ใ‚‹ใŸใ‚
json_string = json.dumps(python_dict, indent=2, ensure_ascii=False)
return json_string
# optimize token --------------------------------
def optimize_token(text: str, funcs: list[Callable[[str], str]] | None = None) -> str:
"""
ใƒ†ใ‚ญใ‚นใƒˆใฎใƒˆใƒผใ‚ฏใƒณใ‚’ๆœ€้ฉๅŒ–ใ‚’ใ™ใ‚‹
Args:
text (str): ๆœ€้ฉๅŒ–ใ™ใ‚‹ใƒ†ใ‚ญใ‚นใƒˆ
Returns:
str: ๆœ€้ฉๅŒ–ใ•ใ‚ŒใŸใƒ†ใ‚ญใ‚นใƒˆ
"""
funcs = funcs or [minimize_newline, zenkaku_to_hankaku, remove_trailing_spaces]
for func in funcs:
text = func(text)
return text.strip()
def _replace_consecutive(text: str, pattern: str, replacing_text: str) -> str:
"""
ใƒ†ใ‚ญใ‚นใƒˆๅ†…ใฎ้€ฃ็ถšใ™ใ‚‹ใƒ‘ใ‚ฟใƒผใƒณใซๅฏพใ—ใฆใ€ๆŒ‡ๅฎšใ•ใ‚ŒใŸ็ฝฎๆ›ใƒ†ใ‚ญใ‚นใƒˆใง็ฝฎๆ›ใ™ใ‚‹
Args:
text (str): ใƒ†ใ‚ญใ‚นใƒˆ
pattern (str): ็ฝฎๆ›ใ™ใ‚‹ใƒ‘ใ‚ฟใƒผใƒณ
replacing_text (str): ็ฝฎๆ›ใƒ†ใ‚ญใ‚นใƒˆ
Returns:
str: ็ฝฎๆ›ใ•ใ‚ŒใŸใƒ†ใ‚ญใ‚นใƒˆ
"""
p = re.compile(pattern)
matches = [(m.start(), m.end()) for m in p.finditer(text)][::-1]
text_replaced = list(text)
for i_start, i_end in matches:
text_replaced[i_start:i_end] = [replacing_text]
return "".join(text_replaced)
def minimize_newline(text: str) -> str:
"""
ใƒ†ใ‚ญใ‚นใƒˆๅ†…ใฎ้€ฃ็ถšใ™ใ‚‹ๆ”น่กŒใ‚’2ไปฅไธ‹ใซใ™ใ‚‹
Args:
text (str): ใƒ†ใ‚ญใ‚นใƒˆ
Returns:
str: ๆ”น่กŒใ‚’ๆœ€ๅฐ้™ใซใ—ใŸใƒ†ใ‚ญใ‚นใƒˆ
"""
return _replace_consecutive(text, pattern="\n{2,}", replacing_text="\n\n")
def zenkaku_to_hankaku(text: str) -> str:
"""
ใƒ†ใ‚ญใ‚นใƒˆๅ†…ใฎๅ…จ่ง’ๆ–‡ๅญ—ใ‚’ๅŠ่ง’ๆ–‡ๅญ—ใซๅค‰ๆ›ใ™ใ‚‹
Args:
text (str): ใƒ†ใ‚ญใ‚นใƒˆ
Returns:
str: ๅŠ่ง’ๆ–‡ๅญ—ใซๅค‰ๆ›ใ•ใ‚ŒใŸใƒ†ใ‚ญใ‚นใƒˆ
"""
mapping_dict = {"ใ€€": " ", "๏ผš": ": ", "โ€Ž": " ", "๏ผŽ": "ใ€‚", "๏ผŒ": "ใ€", "๏ฟฅ": "ยฅ"}
hankaku_text = ""
for char in text:
# A-Za-z0-9!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
if char in mapping_dict:
hankaku_text += mapping_dict[char]
elif 65281 <= ord(char) <= 65374:
hankaku_text += chr(ord(char) - 65248)
else:
hankaku_text += char
return hankaku_text
def remove_trailing_spaces(text: str) -> str:
"""
ใƒ†ใ‚ญใ‚นใƒˆๅ†…ใฎๅ„่กŒใฎๆœซๅฐพใฎใ‚นใƒšใƒผใ‚นใ‚’ๅ‰Š้™คใ™ใ‚‹
Args:
text (str): ใƒ†ใ‚ญใ‚นใƒˆ
Returns:
str: ใ‚นใƒšใƒผใ‚นใ‚’ๅ‰Š้™คใ—ใŸใƒ†ใ‚ญใ‚นใƒˆ
"""
return "\n".join([line.rstrip() for line in text.split("\n")])