msabia commited on
Commit
31b5567
·
verified ·
1 Parent(s): 076c6ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -18
app.py CHANGED
@@ -8,8 +8,11 @@ import io
8
  import pickle
9
  import random
10
 
11
- def get_image(file, dataset_image_mask, processid_to_index, idx):
12
- image_enc_padded = file[idx].astype(np.uint8)
 
 
 
13
  enc_length = dataset_image_mask[idx]
14
  image_enc = image_enc_padded[:enc_length]
15
  image = Image.open(io.BytesIO(image_enc))
@@ -42,17 +45,17 @@ def searchEmbeddings(id, mod1, mod2):
42
  id_list.append(id)
43
 
44
  # get images
45
- image0 = get_image(dataset_hdf5_all_key['image'], dataset_image_mask, processid_to_index, original_indx)
46
- image1 = get_image(dataset_hdf5_all_key['image'], dataset_image_mask, processid_to_index, I[0][0])
47
- image2 = get_image(dataset_hdf5_all_key['image'], dataset_image_mask, processid_to_index, I[0][1])
48
- image3 = get_image(dataset_hdf5_all_key['image'], dataset_image_mask, processid_to_index, I[0][2])
49
- image4 = get_image(dataset_hdf5_all_key['image'], dataset_image_mask, processid_to_index, I[0][3])
50
- image5 = get_image(dataset_hdf5_all_key['image'], dataset_image_mask, processid_to_index, I[0][4])
51
- image6 = get_image(dataset_hdf5_all_key['image'], dataset_image_mask, processid_to_index, I[0][5])
52
- image7 = get_image(dataset_hdf5_all_key['image'], dataset_image_mask, processid_to_index, I[0][6])
53
- image8 = get_image(dataset_hdf5_all_key['image'], dataset_image_mask, processid_to_index, I[0][7])
54
- image9 = get_image(dataset_hdf5_all_key['image'], dataset_image_mask, processid_to_index, I[0][8])
55
- image10 = get_image(dataset_hdf5_all_key['image'], dataset_image_mask, processid_to_index, I[0][9])
56
 
57
  # get taxonomic information
58
  s0 = getTax(original_indx)
@@ -82,7 +85,10 @@ def getTax(indx):
82
 
83
  with gr.Blocks(title="Bioscan-Clip") as demo:
84
  # open general files
85
- dataset_hdf5_all_key = h5py.File('full5m/BIOSCAN_5M.hdf5', "r", libver="latest")['all_keys']
 
 
 
86
  with open("dataset_processid_list.pickle", "rb") as f:
87
  dataset_processid_list = pickle.load(f)
88
  with open("dataset_image_mask.pickle", "rb") as f:
@@ -101,10 +107,12 @@ with gr.Blocks(title="Bioscan-Clip") as demo:
101
  id_to_dna_emb_dict = pickle.load(f)
102
 
103
  # open taxonomy files
104
- family = [item.decode("utf-8") for item in dataset_hdf5_all_key["family"][:]]
105
- genus = [item.decode("utf-8") for item in dataset_hdf5_all_key["genus"][:]]
106
- species = [item.decode("utf-8") for item in dataset_hdf5_all_key["species"][:]]
107
-
 
 
108
  with gr.Column():
109
  process_id = gr.Textbox(label="ID:", info="Enter a sample ID to search for")
110
  process_id_list = gr.Textbox(label="Closest 10 matches:" )
 
8
  import pickle
9
  import random
10
 
11
+ def get_image(image1, image2, dataset_image_mask, processid_to_index, idx):
12
+ if (idx < 162834):
13
+ image_enc_padded = image1[idx].astype(np.uint8)
14
+ elif(idx >= 162834):
15
+ image_enc_padded = image2[idx-162834].astype(np.uint8)
16
  enc_length = dataset_image_mask[idx]
17
  image_enc = image_enc_padded[:enc_length]
18
  image = Image.open(io.BytesIO(image_enc))
 
45
  id_list.append(id)
46
 
47
  # get images
48
+ image0 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, original_indx)
49
+ image1 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][0])
50
+ image2 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][1])
51
+ image3 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][2])
52
+ image4 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][3])
53
+ image5 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][4])
54
+ image6 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][5])
55
+ image7 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][6])
56
+ image8 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][7])
57
+ image9 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][8])
58
+ image10 = get_image(dataset_image1, dataset_image2, dataset_image_mask, processid_to_index, I[0][9])
59
 
60
  # get taxonomic information
61
  s0 = getTax(original_indx)
 
85
 
86
  with gr.Blocks(title="Bioscan-Clip") as demo:
87
  # open general files
88
+ with open("dataset_image1.pickle", "rb") as f:
89
+ dataset_image1 = pickle.load(f)
90
+ with open("dataset_image2.pickle", "rb") as f:
91
+ dataset_image2 = pickle.load(f)
92
  with open("dataset_processid_list.pickle", "rb") as f:
93
  dataset_processid_list = pickle.load(f)
94
  with open("dataset_image_mask.pickle", "rb") as f:
 
107
  id_to_dna_emb_dict = pickle.load(f)
108
 
109
  # open taxonomy files
110
+ with open("tax/family.pickle", "rb") as f:
111
+ family = [item.decode("utf-8") for item in pickle.load(f)]
112
+ with open("tax/genus.pickle", "rb") as f:
113
+ genus= [item.decode("utf-8") for item in pickle.load(f)]
114
+ with open("tax/species.pickle", "rb") as f:
115
+ species = [item.decode("utf-8") for item in pickle.load(f)]
116
  with gr.Column():
117
  process_id = gr.Textbox(label="ID:", info="Enter a sample ID to search for")
118
  process_id_list = gr.Textbox(label="Closest 10 matches:" )