|
|
|
import re |
|
import time |
|
from typing import Dict, List |
|
|
|
import bleach |
|
import markdown2 |
|
from emoji import unicode_codes |
|
from telegram import MessageEntity |
|
from telegram.helpers import escape_markdown |
|
|
|
|
|
|
|
MATCH_MD = re.compile( |
|
r"\*(.*?)\*|" |
|
r"_(.*?)_|" |
|
r"`(.*?)`|" |
|
r"(?<!\\)(\[.*?\])(\(.*?\))|" |
|
r"(?P<esc>[*_`\[])", |
|
) |
|
|
|
LINK_REGEX = re.compile(r"(?<!\\)\[.+?\]\((.*?)\)") |
|
BTN_URL_REGEX = re.compile(r"(\[([^\[]+?)\]\(buttonurl:(?:/{0,2})(.+?)(:same)?\))") |
|
_EMOJI_REGEXP = None |
|
|
|
|
|
|
|
def get_emoji_regexp(): |
|
global _EMOJI_REGEXP |
|
if _EMOJI_REGEXP is None: |
|
emojis = sorted(unicode_codes.EMOJI_DATA, key=len, reverse=True) |
|
pattern = "(" + "|".join(re.escape(u) for u in emojis) + ")" |
|
return re.compile(pattern) |
|
|
|
|
|
def _selective_escape(to_parse: str) -> str: |
|
""" |
|
Escape all invalid markdown |
|
|
|
:param to_parse: text to escape |
|
:return: valid markdown string |
|
""" |
|
offset = 0 |
|
for match in MATCH_MD.finditer(to_parse): |
|
if match.group("esc"): |
|
ent_start = match.start() |
|
to_parse = ( |
|
to_parse[: ent_start + offset] + "\\" + to_parse[ent_start + offset :] |
|
) |
|
offset += 1 |
|
return to_parse |
|
|
|
|
|
|
|
def _calc_emoji_offset(to_calc) -> int: |
|
|
|
emoticons = get_emoji_regexp().finditer(to_calc) |
|
|
|
|
|
|
|
|
|
return sum(len(e.group(0).encode("utf-16-le")) // 2 - 1 for e in emoticons) |
|
|
|
|
|
def markdown_parser( |
|
txt: str, |
|
entities: Dict[MessageEntity, str] = None, |
|
offset: int = 0, |
|
) -> str: |
|
""" |
|
Parse a string, escaping all invalid markdown entities. |
|
|
|
Escapes URL's so as to avoid URL mangling. |
|
Re-adds any telegram code entities obtained from the entities object. |
|
|
|
:param txt: text to parse |
|
:param entities: dict of message entities in text |
|
:param offset: message offset - command and notename length |
|
:return: valid markdown string |
|
""" |
|
if not entities: |
|
entities = {} |
|
if not txt: |
|
return "" |
|
|
|
prev = 0 |
|
res = "" |
|
|
|
|
|
|
|
for ent, ent_text in entities.items(): |
|
if ent.offset < -offset: |
|
continue |
|
|
|
start = ent.offset + offset |
|
end = ent.offset + offset + ent.length - 1 |
|
|
|
|
|
if ent.type in ("code", "url", "text_link", "spoiler"): |
|
|
|
count = _calc_emoji_offset(txt[:start]) |
|
start -= count |
|
end -= count |
|
|
|
|
|
if ent.type == "url": |
|
if any( |
|
match.start(1) <= start and end <= match.end(1) |
|
for match in LINK_REGEX.finditer(txt) |
|
): |
|
continue |
|
|
|
else: |
|
|
|
res += _selective_escape(txt[prev:start] or "") + escape_markdown( |
|
ent_text, 2 |
|
) |
|
|
|
|
|
elif ent.type == "code": |
|
res += _selective_escape(txt[prev:start]) + "`" + ent_text + "`" |
|
|
|
|
|
elif ent.type == "text_link": |
|
res += _selective_escape(txt[prev:start]) + "[{}]({})".format( |
|
ent_text, |
|
ent.url, |
|
) |
|
|
|
elif ent.type == "spoiler": |
|
res += _selective_escape(txt[prev:start]) + "||" + ent_text + "||" |
|
|
|
end += 1 |
|
|
|
|
|
else: |
|
continue |
|
|
|
prev = end |
|
|
|
res += _selective_escape(txt[prev:]) |
|
return res |
|
|
|
|
|
def button_markdown_parser( |
|
txt: str, |
|
entities: Dict[MessageEntity, str] = None, |
|
offset: int = 0, |
|
) -> (str, List): |
|
markdown_note = markdown_parser(txt, entities, offset) |
|
prev = 0 |
|
note_data = "" |
|
buttons = [] |
|
for match in BTN_URL_REGEX.finditer(markdown_note): |
|
|
|
n_escapes = 0 |
|
to_check = match.start(1) - 1 |
|
while to_check > 0 and markdown_note[to_check] == "\\": |
|
n_escapes += 1 |
|
to_check -= 1 |
|
|
|
|
|
if n_escapes % 2 == 0: |
|
|
|
buttons.append((match.group(2), match.group(3), bool(match.group(4)))) |
|
note_data += markdown_note[prev : match.start(1)] |
|
prev = match.end(1) |
|
|
|
else: |
|
note_data += markdown_note[prev:to_check] |
|
prev = match.start(1) - 1 |
|
else: |
|
note_data += markdown_note[prev:] |
|
|
|
return note_data, buttons |
|
|
|
|
|
def escape_invalid_curly_brackets(text: str, valids: List[str]) -> str: |
|
new_text = "" |
|
idx = 0 |
|
while idx < len(text): |
|
if text[idx] == "{": |
|
if idx + 1 < len(text) and text[idx + 1] == "{": |
|
idx += 2 |
|
new_text += "{{{{" |
|
continue |
|
else: |
|
success = False |
|
for v in valids: |
|
if text[idx:].startswith("{" + v + "}"): |
|
success = True |
|
break |
|
if success: |
|
new_text += text[idx : idx + len(v) + 2] |
|
idx += len(v) + 2 |
|
continue |
|
else: |
|
new_text += "{{" |
|
|
|
elif text[idx] == "}": |
|
if idx + 1 < len(text) and text[idx + 1] == "}": |
|
idx += 2 |
|
new_text += "}}}}" |
|
continue |
|
else: |
|
new_text += "}}" |
|
|
|
else: |
|
new_text += text[idx] |
|
idx += 1 |
|
|
|
return new_text |
|
|
|
|
|
SMART_OPEN = "β" |
|
SMART_CLOSE = "β" |
|
START_CHAR = ("'", '"', SMART_OPEN) |
|
|
|
|
|
def split_quotes(text: str) -> List: |
|
if not any(text.startswith(char) for char in START_CHAR): |
|
return text.split(None, 1) |
|
counter = 1 |
|
while counter < len(text): |
|
if text[counter] == "\\": |
|
counter += 1 |
|
elif text[counter] == text[0] or ( |
|
text[0] == SMART_OPEN and text[counter] == SMART_CLOSE |
|
): |
|
break |
|
counter += 1 |
|
else: |
|
return text.split(None, 1) |
|
|
|
|
|
key = remove_escapes(text[1:counter].strip()) |
|
|
|
rest = text[counter + 1 :].strip() |
|
if not key: |
|
key = text[0] + text[0] |
|
return list(filter(None, [key, rest])) |
|
|
|
|
|
def remove_escapes(text: str) -> str: |
|
res = "" |
|
is_escaped = False |
|
for counter in range(len(text)): |
|
if is_escaped: |
|
res += text[counter] |
|
is_escaped = False |
|
elif text[counter] == "\\": |
|
is_escaped = True |
|
else: |
|
res += text[counter] |
|
return res |
|
|
|
|
|
def escape_chars(text: str, to_escape: List[str]) -> str: |
|
to_escape.append("\\") |
|
new_text = "" |
|
for x in text: |
|
if x in to_escape: |
|
new_text += "\\" |
|
new_text += x |
|
return new_text |
|
|
|
|
|
async def extract_time(message, time_val): |
|
if any(time_val.endswith(unit) for unit in ("m", "h", "d")): |
|
unit = time_val[-1] |
|
time_num = time_val[:-1] |
|
if not time_num.isdigit(): |
|
await message.reply_text("Invalid time amount specified.") |
|
return "" |
|
|
|
if unit == "m": |
|
bantime = int(time.time() + int(time_num) * 60) |
|
elif unit == "h": |
|
bantime = int(time.time() + int(time_num) * 60 * 60) |
|
elif unit == "d": |
|
bantime = int(time.time() + int(time_num) * 24 * 60 * 60) |
|
else: |
|
|
|
return "" |
|
return bantime |
|
else: |
|
await message.reply_text( |
|
"Invalid time type specified. Expected m,h, or d, got: {}".format( |
|
time_val[-1], |
|
), |
|
) |
|
return "" |
|
|
|
|
|
def markdown_to_html(text: str): |
|
text = text.replace("*", "**") |
|
text = text.replace("`", "```") |
|
text = text.replace("~", "~~") |
|
|
|
spoiler_pattern = re.compile(r"\|\|(?=\S)(.+?)(?<=\S)\|\|", re.S) |
|
text = spoiler_pattern.sub(r"<tg-spoiler>\1</tg-spoiler>", text) |
|
|
|
_html = markdown2.markdown(text, extras=["strike", "underline"]) |
|
return bleach.clean( |
|
_html, |
|
tags=["strong", "em", "a", "code", "pre", "strike", "u", "tg-spoiler"], |
|
strip=True, |
|
)[:-1] |
|
|
|
|
|
|
|
|