File size: 3,840 Bytes
664a6cd
 
 
 
 
 
 
 
 
 
 
 
09cfca6
664a6cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2e87fdf
664a6cd
 
 
 
 
 
 
 
 
 
 
 
 
2e87fdf
664a6cd
 
 
 
 
ffaa5bb
1c4ae30
664a6cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2e87fdf
664a6cd
 
a3d65e2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#run the app
#python -m streamlit run d:/NSFW/Project/test1.py
import torch
from transformers import BertTokenizer, BertForSequenceClassification
import math, keras_ocr
# Initialize pipeline
pipeline = keras_ocr.pipeline.Pipeline()
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model_2 = BertForSequenceClassification.from_pretrained("CustomModel")

model_2.to('cpu')
import streamlit as st
from PIL import Image, ImageOps
def get_distance(predictions):
    """
    Function returns dictionary with (key,value):
        * text : detected text in image
        * center_x : center of bounding box (x)
        * center_y : center of bounding box (y)
        * distance_from_origin : hypotenuse
        * distance_y : distance between y and origin (0,0)
    """

    # Point of origin
    x0, y0 = 0, 0

    # Generate dictionary
    detections = []
    for group in predictions:

        # Get center point of bounding box
        top_left_x, top_left_y = group[1][0]
        bottom_right_x, bottom_right_y = group[1][1]
        center_x, center_y = (top_left_x + bottom_right_x)/2, (top_left_y + bottom_right_y)/2

        # Use the Pythagorean Theorem to solve for distance from origin
        distance_from_origin = math.dist([x0,y0], [center_x, center_y])

        # Calculate difference between y and origin to get unique rows
        distance_y = center_y - y0

        # Append all results
        detections.append({
                            'text': group[0],
                            'center_x': center_x,
                            'center_y': center_y,
                            'distance_from_origin': distance_from_origin,
                            'distance_y': distance_y
                        })

    return detections

def distinguish_rows(lst, thresh=10):
    """Function to help distinguish unique rows"""
    sublists = []
    for i in range(0, len(lst)-1):
        if (lst[i+1]['distance_y'] - lst[i]['distance_y'] <= thresh):
            if lst[i] not in sublists:
                sublists.append(lst[i])
            sublists.append(lst[i+1])
        else:
            yield sublists
            sublists = [lst[i+1]]
    yield sublists

# Title of the app
st.title("NSFW Content Detector")

# File uploader widget
uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])

if uploaded_file is not None:
    st.image(uploaded_file, caption='Uploaded Image', width=200)
    #st.image(uploaded_file, caption='Uploaded Image', use_column_width=True)

# Read in image
    read_image = keras_ocr.tools.read(uploaded_file)

# prediction_groups is a list of (word, box) tuples
    prediction_groups = pipeline.recognize([read_image])
    predictions = prediction_groups[0] # extract text list
    predictions = get_distance(predictions)
    # Set thresh higher for text further apart
    predictions = list(distinguish_rows(predictions, thresh=10))

    # Remove all empty rows
    predictions = list(filter(lambda x:x!=[], predictions))

    # Order text detections in human readable format
    ordered_preds = []
    for row in predictions:
        row = sorted(row, key=lambda x:x['distance_from_origin'])
        for each in row: ordered_preds.append(each['text'])

    # Join detections into sentence
    sentance = ' '.join(ordered_preds)
    #st.write(sentance)

    text =sentance
    print(text)
    inputs = tokenizer(text,padding = True, truncation = True, return_tensors='pt').to('cpu')
    outputs = model_2(**inputs)
    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
    predictions = predictions.cpu().detach().numpy()
    print(predictions[0][0],predictions[0][1])
    if predictions[0][0]>predictions[0][1]:
        print('safe')
        st.write('Safe for Work')
    else:
        print('Not safe')
        st.write('Not Safe for Work')