Spaces:
Sleeping
Sleeping
Commit
·
db03f5d
1
Parent(s):
1272949
Refactor generate_embedding function to remove unneeded variables
Browse files
app.py
CHANGED
@@ -43,7 +43,7 @@ def generate_embedding(text_data, image_data):
|
|
43 |
|
44 |
# Embed text data
|
45 |
text_embeddings = []
|
46 |
-
|
47 |
if text_data:
|
48 |
# If text_data is a string, convert to list of strings
|
49 |
if isinstance(text_data, str):
|
@@ -54,7 +54,7 @@ def generate_embedding(text_data, image_data):
|
|
54 |
text_data = list(text_data)
|
55 |
|
56 |
# Keep track of indices of empty text strings
|
57 |
-
|
58 |
|
59 |
# Remove empty text strings
|
60 |
text_data = [text for text in text_data if text != ""]
|
@@ -71,12 +71,12 @@ def generate_embedding(text_data, image_data):
|
|
71 |
text_embeddings = [embedding.detach().cpu().numpy().tolist() for embedding in text_embeddings]
|
72 |
|
73 |
# Insert empty strings at indices of empty text strings
|
74 |
-
for i in
|
75 |
text_embeddings.insert(i, "")
|
76 |
|
77 |
# Embed image data
|
78 |
image_embeddings = []
|
79 |
-
|
80 |
if image_data:
|
81 |
# If image_data is a single PIL image, convert to list of PIL images
|
82 |
if isinstance(image_data, PIL.Image.Image):
|
@@ -87,7 +87,7 @@ def generate_embedding(text_data, image_data):
|
|
87 |
image_data = list(image_data)
|
88 |
|
89 |
# Keep track of indices of None images
|
90 |
-
|
91 |
|
92 |
# Remove None images
|
93 |
image_data = [img for img in image_data if img is not None]
|
@@ -105,12 +105,12 @@ def generate_embedding(text_data, image_data):
|
|
105 |
image_embeddings = [embedding.detach().cpu().numpy().tolist() for embedding in image_embeddings]
|
106 |
|
107 |
# Insert empty strings at indices of empty images
|
108 |
-
for i in
|
109 |
image_embeddings.insert(i, "")
|
110 |
|
111 |
# Calculate cosine similarity between text and image embeddings
|
112 |
similarity = []
|
113 |
-
|
114 |
if text_embeddings and image_embeddings:
|
115 |
# Filter out embedding pairs with either empty text or image embeddings, tracking indices of empty embeddings
|
116 |
text_embeddings_filtered = []
|
@@ -120,7 +120,7 @@ def generate_embedding(text_data, image_data):
|
|
120 |
text_embeddings_filtered.append(text_embedding)
|
121 |
image_embeddings_filtered.append(image_embedding)
|
122 |
else:
|
123 |
-
|
124 |
|
125 |
# Calculate cosine similarity if there are any non-empty embedding pairs
|
126 |
if image_embeddings_filtered and text_embeddings_filtered:
|
@@ -138,7 +138,7 @@ def generate_embedding(text_data, image_data):
|
|
138 |
similarity = [f"{sim.item() * 100:.2f}%" for sim in similarity]
|
139 |
|
140 |
# Insert empty text strings in similarity
|
141 |
-
for i in
|
142 |
similarity.insert(i, "")
|
143 |
|
144 |
return (text_embeddings, image_embeddings, similarity)
|
|
|
43 |
|
44 |
# Embed text data
|
45 |
text_embeddings = []
|
46 |
+
empty_data_indices = []
|
47 |
if text_data:
|
48 |
# If text_data is a string, convert to list of strings
|
49 |
if isinstance(text_data, str):
|
|
|
54 |
text_data = list(text_data)
|
55 |
|
56 |
# Keep track of indices of empty text strings
|
57 |
+
empty_data_indices = [i for i, text in enumerate(text_data) if text == ""]
|
58 |
|
59 |
# Remove empty text strings
|
60 |
text_data = [text for text in text_data if text != ""]
|
|
|
71 |
text_embeddings = [embedding.detach().cpu().numpy().tolist() for embedding in text_embeddings]
|
72 |
|
73 |
# Insert empty strings at indices of empty text strings
|
74 |
+
for i in empty_data_indices:
|
75 |
text_embeddings.insert(i, "")
|
76 |
|
77 |
# Embed image data
|
78 |
image_embeddings = []
|
79 |
+
empty_data_indices = []
|
80 |
if image_data:
|
81 |
# If image_data is a single PIL image, convert to list of PIL images
|
82 |
if isinstance(image_data, PIL.Image.Image):
|
|
|
87 |
image_data = list(image_data)
|
88 |
|
89 |
# Keep track of indices of None images
|
90 |
+
empty_data_indices = [i for i, img in enumerate(image_data) if img is None]
|
91 |
|
92 |
# Remove None images
|
93 |
image_data = [img for img in image_data if img is not None]
|
|
|
105 |
image_embeddings = [embedding.detach().cpu().numpy().tolist() for embedding in image_embeddings]
|
106 |
|
107 |
# Insert empty strings at indices of empty images
|
108 |
+
for i in empty_data_indices:
|
109 |
image_embeddings.insert(i, "")
|
110 |
|
111 |
# Calculate cosine similarity between text and image embeddings
|
112 |
similarity = []
|
113 |
+
empty_data_indices = []
|
114 |
if text_embeddings and image_embeddings:
|
115 |
# Filter out embedding pairs with either empty text or image embeddings, tracking indices of empty embeddings
|
116 |
text_embeddings_filtered = []
|
|
|
120 |
text_embeddings_filtered.append(text_embedding)
|
121 |
image_embeddings_filtered.append(image_embedding)
|
122 |
else:
|
123 |
+
empty_data_indices.append(i)
|
124 |
|
125 |
# Calculate cosine similarity if there are any non-empty embedding pairs
|
126 |
if image_embeddings_filtered and text_embeddings_filtered:
|
|
|
138 |
similarity = [f"{sim.item() * 100:.2f}%" for sim in similarity]
|
139 |
|
140 |
# Insert empty text strings in similarity
|
141 |
+
for i in empty_data_indices:
|
142 |
similarity.insert(i, "")
|
143 |
|
144 |
return (text_embeddings, image_embeddings, similarity)
|