Samuel Schmidt commited on
Commit
8e7c132
·
1 Parent(s): e64c54e

Update src/app.py

Browse files
Files changed (1) hide show
  1. src/app.py +4 -8
src/app.py CHANGED
@@ -11,19 +11,16 @@ dataset = load_dataset("huggan/CelebA-faces")
11
  candidate_subset = dataset["train"].select(range(10)) # This is a small CBIR app! :D
12
 
13
  def index_dataset(dataset):
14
- # This function might need to be split up, to reduce start-up time of app
15
- # It could also use batches to increase speed
16
- # If indexes are saved in files, this is all not really necessary
17
 
18
- ## Color Embeddings
19
  cd = ColorDescriptor((8, 12, 3))
20
- dataset_with_embeddings = dataset.map(lambda row: {'color_embeddings': cd.describe(row["image"])}) # we assume that dataset has a column 'image'
21
 
22
- ## CLIP Embeddings
23
  clip_model = CLIPImageEncoder()
24
  dataset_with_embeddings = dataset_with_embeddings.map(clip_model.encode_images, batched=True, batch_size=16)
25
 
26
- ## LBP Embeddings
27
  lbp_model = LBPImageEncoder(8,2)
28
  dataset_with_embeddings = dataset_with_embeddings.map(lambda row: {'lbp_embeddings': lbp_model.preprocess_img(row["image"])})
29
 
@@ -52,7 +49,6 @@ def check_index(ds):
52
  dataset_with_embeddings = check_index(candidate_subset)
53
 
54
  # Main function, to find similar images
55
- # TODO: allow different descriptor/embedding functions
56
  # TODO: implement different distance measures
57
 
58
  def get_neighbors(query_image, selected_descriptor, top_k=5):
 
11
  candidate_subset = dataset["train"].select(range(10)) # This is a small CBIR app! :D
12
 
13
  def index_dataset(dataset):
 
 
 
14
 
15
+ print("Color Embeddings")
16
  cd = ColorDescriptor((8, 12, 3))
17
+ dataset_with_embeddings = dataset.map(lambda row: {'color_embeddings': cd.describe(row["image"])})
18
 
19
+ print("CLIP Embeddings")
20
  clip_model = CLIPImageEncoder()
21
  dataset_with_embeddings = dataset_with_embeddings.map(clip_model.encode_images, batched=True, batch_size=16)
22
 
23
+ print("LBP Embeddings")
24
  lbp_model = LBPImageEncoder(8,2)
25
  dataset_with_embeddings = dataset_with_embeddings.map(lambda row: {'lbp_embeddings': lbp_model.preprocess_img(row["image"])})
26
 
 
49
  dataset_with_embeddings = check_index(candidate_subset)
50
 
51
  # Main function, to find similar images
 
52
  # TODO: implement different distance measures
53
 
54
  def get_neighbors(query_image, selected_descriptor, top_k=5):