Spaces:
Running
Running
- helper_regex.py +20 -6
helper_regex.py
CHANGED
@@ -9,12 +9,26 @@
|
|
9 |
|
10 |
import re
|
11 |
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
def is_blocked_markdown_code(message):
|
20 |
pattern = r"(?:python|py|javascript|js|bash|sh|html|go|cpp|c|json)\n.*?"
|
|
|
9 |
|
10 |
import re
|
11 |
|
12 |
+
WHITELIST_WORDS = {"eval", "admin", "bot", "python", "ok", "done", "anti"}
|
13 |
+
|
14 |
+
def contains_stylish_with_whitelist(text: str) -> bool:
|
15 |
+
emoji_pattern = re.compile(
|
16 |
+
"[\U0001F600-\U0001F64F"
|
17 |
+
"\U0001F300-\U0001F5FF"
|
18 |
+
"\U0001F680-\U0001F6FF"
|
19 |
+
"\U0001F1E6-\U0001F1FF"
|
20 |
+
"\u2600-\u26FF\u2700-\u27BF]+", flags=re.UNICODE
|
21 |
+
)
|
22 |
+
text_wo_emoji = emoji_pattern.sub('', text)
|
23 |
+
|
24 |
+
words = text_wo_emoji.split()
|
25 |
+
for word in words:
|
26 |
+
if word.lower() in WHITELIST_WORDS:
|
27 |
+
continue
|
28 |
+
for char in word:
|
29 |
+
if ord(char) > 127 and not char.isascii():
|
30 |
+
return True
|
31 |
+
return False
|
32 |
|
33 |
def is_blocked_markdown_code(message):
|
34 |
pattern = r"(?:python|py|javascript|js|bash|sh|html|go|cpp|c|json)\n.*?"
|