File size: 5,144 Bytes
2519bba
58e9235
 
9b3cb35
58e9235
2519bba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83c62cb
2519bba
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137

from data_extractor import DataExtractor
from models import FaceNetModel
from pineconevectors import Pinecone
from embeddings_generation import FaceEmbedding
import streamlit as st, numpy as np, cv2
from PIL import Image


data_extractor = DataExtractor('./data/Json', './data/Images')
combined_data, paths = data_extractor.concat_data()

model = FaceNetModel()
mtcnn, resnet = model.initialize_model()
transform = model.get_transform()

embeddings = FaceEmbedding(transform, resnet)

pinecone = Pinecone('c984cd49-42a6-4aa0-b2f2-e96cfb8f59bc', 'gcp-starter', 'facenet')
pinecone_index = pinecone.initialize_index()

def process_images():
    count = 0
    for index, image_path in enumerate(paths):
        try:
            img = Image.open(image_path)
            img = img.convert("RGB")
            width, height = img.size
            boxes, _ = mtcnn.detect(img)
            
            id = combined_data['id'][index]
            img_url = combined_data['Image_URL'][index]
            page_url = combined_data['Page_URL'][index]
            
            if len(boxes) == 1:
                # print(index)
                try:
                    face_embedding = embeddings.calculate_face_embedding(img, boxes[0])
                    x1, y1, x2, y2 = [int(coord) for coord in boxes[0]]
                    
                    coordinates = [x1/width, y1/height, x2/width, y2/height]
                    pinecone.upsert_data(id, face_embedding, image_path, img_url, page_url, coordinates, True)
                except Exception as e:
                    print(e)
                    continue
            if len(boxes) > 1:
                for box in boxes:
                    # print(index)
                    try:
                        face_embedding = embeddings.calculate_face_embedding(img, box)
                        x1, y1, x2, y2 = [int(coord) for coord in box]
                        coordinates = [x1/width, y1/height, x2/width, y2/height]
                        
                        ### store data
                        pinecone.upsert_data(id, face_embedding, image_path, img_url, page_url, coordinates, False)
                    except Exception as e:
                        print(e)
                        continue
                    
        except FileNotFoundError:
            print(f"File not found: {image_path}")

        except OSError:
            print(f"Not an image file or image file is corrupted: {image_path}")
            
        except MemoryError:
            print(f"Out of memory when trying to open image: {image_path}")

        count+=1
    print(count)

        
def search_images(query_img):
    
    boxes, _ = mtcnn.detect(query_img)
    query_embedding = embeddings.calculate_face_embedding(query_img, boxes[0])
    query_embedding = query_embedding.tolist()
    
    return pinecone.search_data(query_embedding)

def get_image():
    st.title("Image Upload")

    image_file = st.file_uploader("Upload Image", type=['png', 'jpeg', 'jpg', 'jfif'])
    if image_file is not None:
        image = Image.open(image_file)
        st.image(image, use_column_width=True)
        matches = search_images(image)

        return matches
    


def display_image(image):
    st.image(image, use_column_width=True)

def process_matches(matches):
    for match in matches['matches']:
        if match['metadata']['Single Face'] == False:
            img_id = match['metadata']['Image id']
            results = pinecone_index.query(vector = match['values'], top_k = 4, include_values = False, include_metadata = True, filter={'Image id': {'$eq': img_id}})
            path = match['metadata']['directory path']
            image = Image.open(path)
            width, height = image.size

            for face in results['matches']:
                if face['score'] < 0.9:
                    normalized_coordinates = face['metadata']['Face Coordinates']
                    normalized_coordinates = [float(item) for item in normalized_coordinates]

                    coordinates = [normalized_coordinates[0] * width, normalized_coordinates[1] * height, normalized_coordinates[2] * width, normalized_coordinates[3] * height]
                    x1, y1, x2, y2 = [int(coord) for coord in coordinates]
                    face_width = x2 - x1
                    face_height = y2 - y1
                    face_region = np.array(image.crop(tuple(coordinates)))

                    blurred_face_region = cv2.GaussianBlur(face_region, (99, 99), 20)
                    blurred_face_image = Image.fromarray(blurred_face_region)

                    if blurred_face_image.size != (face_width, face_height):
                        blurred_face_image = blurred_face_image.resize((face_width, face_height))

                    image.paste(blurred_face_image, (x1, y1))
            display_image(image) 
        else:
            path = match['metadata']['directory path']
            img = Image.open(path)
            display_image(img)


    
if __name__ == "__main__":
    process_images()
    matches = get_image()
    if matches is not None:
            process_matches(matches)