Spaces:
Runtime error
Runtime error
from data_extractor import DataExtractor | |
from models import FaceNetModel | |
from pineconevectors import Pinecone | |
from embeddings_generation import FaceEmbedding | |
import streamlit as st, numpy as np, cv2 | |
from PIL import Image | |
data_extractor = DataExtractor('./data/Json', './data/Images') | |
combined_data, paths = data_extractor.concat_data() | |
model = FaceNetModel() | |
mtcnn, resnet = model.initialize_model() | |
transform = model.get_transform() | |
embeddings = FaceEmbedding(transform, resnet) | |
pinecone = Pinecone('c984cd49-42a6-4aa0-b2f2-e96cfb8f59bc', 'gcp-starter', 'facenet') | |
pinecone_index = pinecone.initialize_index() | |
def process_images(): | |
count = 0 | |
for index, image_path in enumerate(paths): | |
try: | |
img = Image.open(image_path) | |
img = img.convert("RGB") | |
width, height = img.size | |
boxes, _ = mtcnn.detect(img) | |
id = combined_data['id'][index] | |
img_url = combined_data['Image_URL'][index] | |
page_url = combined_data['Page_URL'][index] | |
if len(boxes) == 1: | |
# print(index) | |
try: | |
face_embedding = embeddings.calculate_face_embedding(img, boxes[0]) | |
x1, y1, x2, y2 = [int(coord) for coord in boxes[0]] | |
coordinates = [x1/width, y1/height, x2/width, y2/height] | |
pinecone.upsert_data(id, face_embedding, image_path, img_url, page_url, coordinates, True) | |
except Exception as e: | |
print(e) | |
continue | |
if len(boxes) > 1: | |
for box in boxes: | |
# print(index) | |
try: | |
face_embedding = embeddings.calculate_face_embedding(img, box) | |
x1, y1, x2, y2 = [int(coord) for coord in box] | |
coordinates = [x1/width, y1/height, x2/width, y2/height] | |
### store data | |
pinecone.upsert_data(id, face_embedding, image_path, img_url, page_url, coordinates, False) | |
except Exception as e: | |
print(e) | |
continue | |
except FileNotFoundError: | |
print(f"File not found: {image_path}") | |
except OSError: | |
print(f"Not an image file or image file is corrupted: {image_path}") | |
except MemoryError: | |
print(f"Out of memory when trying to open image: {image_path}") | |
count+=1 | |
print(count) | |
def search_images(query_img): | |
boxes, _ = mtcnn.detect(query_img) | |
query_embedding = embeddings.calculate_face_embedding(query_img, boxes[0]) | |
query_embedding = query_embedding.tolist() | |
return pinecone.search_data(query_embedding) | |
def get_image(): | |
st.title("Image Upload") | |
image_file = st.file_uploader("Upload Image", type=['png', 'jpeg', 'jpg', 'jfif']) | |
if image_file is not None: | |
image = Image.open(image_file) | |
st.image(image, use_column_width=True) | |
matches = search_images(image) | |
return matches | |
def display_image(image): | |
st.image(image, use_column_width=True) | |
def process_matches(matches): | |
for match in matches['matches']: | |
if match['metadata']['Single Face'] == False: | |
img_id = match['metadata']['Image id'] | |
results = pinecone_index.query(vector = match['values'], top_k = 4, include_values = False, include_metadata = True, filter={'Image id': {'$eq': img_id}}) | |
path = match['metadata']['directory path'] | |
image = Image.open(path) | |
width, height = image.size | |
for face in results['matches']: | |
if face['score'] < 0.9: | |
normalized_coordinates = face['metadata']['Face Coordinates'] | |
normalized_coordinates = [float(item) for item in normalized_coordinates] | |
coordinates = [normalized_coordinates[0] * width, normalized_coordinates[1] * height, normalized_coordinates[2] * width, normalized_coordinates[3] * height] | |
x1, y1, x2, y2 = [int(coord) for coord in coordinates] | |
face_width = x2 - x1 | |
face_height = y2 - y1 | |
face_region = np.array(image.crop(tuple(coordinates))) | |
blurred_face_region = cv2.GaussianBlur(face_region, (99, 99), 20) | |
blurred_face_image = Image.fromarray(blurred_face_region) | |
if blurred_face_image.size != (face_width, face_height): | |
blurred_face_image = blurred_face_image.resize((face_width, face_height)) | |
image.paste(blurred_face_image, (x1, y1)) | |
display_image(image) | |
else: | |
path = match['metadata']['directory path'] | |
img = Image.open(path) | |
display_image(img) | |
if __name__ == "__main__": | |
#process_images() | |
matches = get_image() | |
if matches is not None: | |
process_matches(matches) | |