Spaces:
Running
Running
from sentence_transformers import util | |
from transformers import pipeline | |
from PIL import Image, ImageDraw | |
from sentence_transformers import util,SentenceTransformer | |
checkpoint = "google/owlvit-base-patch32" | |
detector = pipeline(model=checkpoint, task="zero-shot-object-detection") | |
model = SentenceTransformer('clip-ViT-L-14') | |
def get_face_image(im1): | |
predictions = detector( | |
im1, | |
candidate_labels=["human face"], | |
) | |
max_score = 0 | |
box_area = None | |
for prediction in predictions: | |
box = prediction["box"] | |
label = prediction["label"] | |
score = prediction["score"] | |
if score > max_score : | |
xmin, ymin, xmax, ymax = box.values() | |
box_area = (xmin, ymin, xmax, ymax) | |
max_score = score | |
else: | |
continue | |
draw = ImageDraw.Draw(im1) | |
draw.rectangle(box_area, outline="red", width=1) | |
#draw.text((xmin, ymin), f"{label}: {round(score,2)}", fill="blue") | |
crop_img1 = im1.crop(box_area) | |
#display(crop_img1) | |
newsize = (256, 256) | |
face_img1 = crop_img1.resize(newsize) | |
#display(face_img1) | |
return face_img1 | |
def predict(im1, im2,inp_sim): | |
face_image1 = get_face_image(im1) | |
face_image2 = get_face_image(im2) | |
img_emb = model.encode([face_image1, face_image2]) | |
sim = util.cos_sim(img_emb[0], img_emb[1]) | |
if sim > inp_sim: | |
return sim, "SAME PERSON, UNLOCK PHONE" | |
else: | |
return sim, "DIFFERENT PEOPLE, DON'T UNLOCK" | |