from data_extractor import DataExtractor from models import FaceNetModel from pineconevectors import Pinecone from embeddings_generation import FaceEmbedding import streamlit as st, numpy as np, cv2 from PIL import Image data_extractor = DataExtractor('./data/Json', './data/Images') combined_data, paths = data_extractor.concat_data() model = FaceNetModel() mtcnn, resnet = model.initialize_model() transform = model.get_transform() embeddings = FaceEmbedding(transform, resnet) pinecone = Pinecone('c984cd49-42a6-4aa0-b2f2-e96cfb8f59bc', 'gcp-starter', 'facenet') pinecone_index = pinecone.initialize_index() def process_images(): count = 0 for index, image_path in enumerate(paths): try: img = Image.open(image_path) img = img.convert("RGB") width, height = img.size boxes, _ = mtcnn.detect(img) id = combined_data['id'][index] img_url = combined_data['Image_URL'][index] page_url = combined_data['Page_URL'][index] if len(boxes) == 1: # print(index) try: face_embedding = embeddings.calculate_face_embedding(img, boxes[0]) x1, y1, x2, y2 = [int(coord) for coord in boxes[0]] coordinates = [x1/width, y1/height, x2/width, y2/height] pinecone.upsert_data(id, face_embedding, image_path, img_url, page_url, coordinates, True) except Exception as e: print(e) continue if len(boxes) > 1: for box in boxes: # print(index) try: face_embedding = embeddings.calculate_face_embedding(img, box) x1, y1, x2, y2 = [int(coord) for coord in box] coordinates = [x1/width, y1/height, x2/width, y2/height] ### store data pinecone.upsert_data(id, face_embedding, image_path, img_url, page_url, coordinates, False) except Exception as e: print(e) continue except FileNotFoundError: print(f"File not found: {image_path}") except OSError: print(f"Not an image file or image file is corrupted: {image_path}") except MemoryError: print(f"Out of memory when trying to open image: {image_path}") count+=1 print(count) def search_images(query_img): boxes, _ = mtcnn.detect(query_img) query_embedding = embeddings.calculate_face_embedding(query_img, boxes[0]) query_embedding = query_embedding.tolist() return pinecone.search_data(query_embedding) def get_image(): st.title("Image Upload") image_file = st.file_uploader("Upload Image", type=['png', 'jpeg', 'jpg', 'jfif']) if image_file is not None: image = Image.open(image_file) st.image(image, use_column_width=True) matches = search_images(image) return matches def display_image(image): st.image(image, use_column_width=True) def process_matches(matches): for match in matches['matches']: if match['metadata']['Single Face'] == False: img_id = match['metadata']['Image id'] results = pinecone_index.query(vector = match['values'], top_k = 4, include_values = False, include_metadata = True, filter={'Image id': {'$eq': img_id}}) path = match['metadata']['directory path'] image = Image.open(path) width, height = image.size for face in results['matches']: if face['score'] < 0.9: normalized_coordinates = face['metadata']['Face Coordinates'] normalized_coordinates = [float(item) for item in normalized_coordinates] coordinates = [normalized_coordinates[0] * width, normalized_coordinates[1] * height, normalized_coordinates[2] * width, normalized_coordinates[3] * height] x1, y1, x2, y2 = [int(coord) for coord in coordinates] face_width = x2 - x1 face_height = y2 - y1 face_region = np.array(image.crop(tuple(coordinates))) blurred_face_region = cv2.GaussianBlur(face_region, (99, 99), 20) blurred_face_image = Image.fromarray(blurred_face_region) if blurred_face_image.size != (face_width, face_height): blurred_face_image = blurred_face_image.resize((face_width, face_height)) image.paste(blurred_face_image, (x1, y1)) display_image(image) else: path = match['metadata']['directory path'] img = Image.open(path) display_image(img) if __name__ == "__main__": #process_images() matches = get_image() if matches is not None: process_matches(matches)