File size: 1,689 Bytes
d9fdfda
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from transformers import pipeline
import requests as re

labels = ['toxic', 'racist', 'gender bias', 'religious bias', 'aggressive',
          'personal attacks', 'hate speech', 'offensive language']

# model chosen is fbbart
model = pipeline("zero-shot-classification",
                 model="facebook/bart-large-mnli")


def zero_shot_classifier(text, model=model, labels=labels):
    text = text
    prediction = model(text, labels, multi_label=True)
    if prediction["scores"][0] < 0.75:
        return False
    else:
        return str(prediction["labels"][0])


# catch darkweb links
def dark_web_links(text):
    # Regular expression pattern to match common dark web link formats
    dark_web_pattern = r"(https?://)?[a-z0-9]+\.onion(/[a-zA-Z0-9]*)*"

    # Search for dark web links in the text
    matches = re.findall(dark_web_pattern, text)

    if matches:
        return True
    else:
        return False


# catch adult content links
def adult_content_sites(text):
    # Regular expression pattern to match common adult content websites
    adult_content_pattern = r"(https?://)?(?:www\.)?(pornhub\.com|xnxx\.com|youporn\.com|redtube\.com|etc)\b"

    # Search for adult content sites in the text
    matches = re.findall(adult_content_pattern, text)

    # If any matches are found, flag them
    if matches:
        return True
    else:
        return False


def content_moderator(text, labels=labels, model=model, classifier=zero_shot_classifier, dkweb=dark_web_links, adult=adult_content_sites):
    if dkweb(text):
        return 'Darkweb link'
    elif adult(text):
        return 'Adult content site'
    elif classifier(text):
        return 'Flagged content'