cbir-image-similarity

Runtime error

Samuel Schmidt commited on Mar 8, 2023

Commit

8e7c132

1 Parent(s): e64c54e

Update src/app.py

Files changed (1) hide show

src/app.py CHANGED Viewed

@@ -11,19 +11,16 @@ dataset = load_dataset("huggan/CelebA-faces")
 candidate_subset = dataset["train"].select(range(10)) # This is a small CBIR app! :D
 def index_dataset(dataset):
-    # This function might need to be split up, to reduce start-up time of app
-    # It could also use batches to increase speed
-    # If indexes are saved in files, this is all not really necessary
-    ## Color Embeddings
     cd = ColorDescriptor((8, 12, 3))
-    dataset_with_embeddings = dataset.map(lambda row: {'color_embeddings': cd.describe(row["image"])}) # we assume that dataset has a column 'image'
-    ## CLIP Embeddings
     clip_model = CLIPImageEncoder()
     dataset_with_embeddings = dataset_with_embeddings.map(clip_model.encode_images, batched=True, batch_size=16)
-    ## LBP Embeddings
     lbp_model = LBPImageEncoder(8,2)
     dataset_with_embeddings = dataset_with_embeddings.map(lambda row: {'lbp_embeddings': lbp_model.preprocess_img(row["image"])})
@@ -52,7 +49,6 @@ def check_index(ds):
 dataset_with_embeddings = check_index(candidate_subset)
 # Main function, to find similar images
-# TODO: allow different descriptor/embedding functions
 # TODO: implement different distance measures
 def get_neighbors(query_image, selected_descriptor, top_k=5):

 candidate_subset = dataset["train"].select(range(10)) # This is a small CBIR app! :D
 def index_dataset(dataset):
+    print("Color Embeddings")
     cd = ColorDescriptor((8, 12, 3))
+    dataset_with_embeddings = dataset.map(lambda row: {'color_embeddings': cd.describe(row["image"])})
+    print("CLIP Embeddings")
     clip_model = CLIPImageEncoder()
     dataset_with_embeddings = dataset_with_embeddings.map(clip_model.encode_images, batched=True, batch_size=16)
+    print("LBP Embeddings")
     lbp_model = LBPImageEncoder(8,2)
     dataset_with_embeddings = dataset_with_embeddings.map(lambda row: {'lbp_embeddings': lbp_model.preprocess_img(row["image"])})
 dataset_with_embeddings = check_index(candidate_subset)
 # Main function, to find similar images
 # TODO: implement different distance measures
 def get_neighbors(query_image, selected_descriptor, top_k=5):