Spaces:
Build error
Build error
Wisdom Chen
commited on
Update model.py
Browse files
model.py
CHANGED
@@ -104,6 +104,45 @@ def initialize_models() -> bool:
|
|
104 |
|
105 |
except Exception as e:
|
106 |
raise RuntimeError(f"Model initialization failed: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
|
108 |
# Data loading
|
109 |
def load_data() -> bool:
|
@@ -262,45 +301,6 @@ def load_data() -> bool:
|
|
262 |
image_faiss = None
|
263 |
raise RuntimeError(f"Data loading failed: {str(e)}")
|
264 |
|
265 |
-
def load_embeddings_from_huggingface(repo_id: str) -> Tuple[Dict, Dict]:
|
266 |
-
"""
|
267 |
-
Load embeddings from Hugging Face repository with enhanced error handling.
|
268 |
-
|
269 |
-
Args:
|
270 |
-
repo_id (str): Hugging Face repository ID
|
271 |
-
|
272 |
-
Returns:
|
273 |
-
Tuple[Dict, Dict]: Dictionaries containing text and image embeddings
|
274 |
-
"""
|
275 |
-
print("Loading embeddings from Hugging Face...")
|
276 |
-
try:
|
277 |
-
file_path = hf_hub_download(
|
278 |
-
repo_id=repo_id,
|
279 |
-
filename="embeddings.parquet",
|
280 |
-
repo_type="dataset"
|
281 |
-
)
|
282 |
-
df = pd.read_parquet(file_path)
|
283 |
-
|
284 |
-
# Extract embedding columns
|
285 |
-
text_cols = [col for col in df.columns if col.startswith('text_embedding_')]
|
286 |
-
image_cols = [col for col in df.columns if col.startswith('image_embedding_')]
|
287 |
-
|
288 |
-
# Create embedding dictionaries
|
289 |
-
text_embeddings_dict = {
|
290 |
-
row['Uniq_Id']: row[text_cols].values.astype(np.float32)
|
291 |
-
for _, row in df.iterrows()
|
292 |
-
}
|
293 |
-
image_embeddings_dict = {
|
294 |
-
row['Uniq_Id']: row[image_cols].values.astype(np.float32)
|
295 |
-
for _, row in df.iterrows()
|
296 |
-
}
|
297 |
-
|
298 |
-
print(f"Successfully loaded {len(text_embeddings_dict)} embeddings")
|
299 |
-
return text_embeddings_dict, image_embeddings_dict
|
300 |
-
|
301 |
-
except Exception as e:
|
302 |
-
raise RuntimeError(f"Failed to load embeddings from Hugging Face: {str(e)}")
|
303 |
-
|
304 |
# FAISS index creation
|
305 |
class MultiModalFAISSIndex:
|
306 |
def __init__(self, dimension, index_type='L2'):
|
|
|
104 |
|
105 |
except Exception as e:
|
106 |
raise RuntimeError(f"Model initialization failed: {str(e)}")
|
107 |
+
|
108 |
+
def load_embeddings_from_huggingface(repo_id: str) -> Tuple[Dict, Dict]:
|
109 |
+
"""
|
110 |
+
Load embeddings from Hugging Face repository with enhanced error handling.
|
111 |
+
|
112 |
+
Args:
|
113 |
+
repo_id (str): Hugging Face repository ID
|
114 |
+
|
115 |
+
Returns:
|
116 |
+
Tuple[Dict, Dict]: Dictionaries containing text and image embeddings
|
117 |
+
"""
|
118 |
+
print("Loading embeddings from Hugging Face...")
|
119 |
+
try:
|
120 |
+
file_path = hf_hub_download(
|
121 |
+
repo_id=repo_id,
|
122 |
+
filename="embeddings.parquet",
|
123 |
+
repo_type="dataset"
|
124 |
+
)
|
125 |
+
df = pd.read_parquet(file_path)
|
126 |
+
|
127 |
+
# Extract embedding columns
|
128 |
+
text_cols = [col for col in df.columns if col.startswith('text_embedding_')]
|
129 |
+
image_cols = [col for col in df.columns if col.startswith('image_embedding_')]
|
130 |
+
|
131 |
+
# Create embedding dictionaries
|
132 |
+
text_embeddings_dict = {
|
133 |
+
row['Uniq_Id']: row[text_cols].values.astype(np.float32)
|
134 |
+
for _, row in df.iterrows()
|
135 |
+
}
|
136 |
+
image_embeddings_dict = {
|
137 |
+
row['Uniq_Id']: row[image_cols].values.astype(np.float32)
|
138 |
+
for _, row in df.iterrows()
|
139 |
+
}
|
140 |
+
|
141 |
+
print(f"Successfully loaded {len(text_embeddings_dict)} embeddings")
|
142 |
+
return text_embeddings_dict, image_embeddings_dict
|
143 |
+
|
144 |
+
except Exception as e:
|
145 |
+
raise RuntimeError(f"Failed to load embeddings from Hugging Face: {str(e)}")
|
146 |
|
147 |
# Data loading
|
148 |
def load_data() -> bool:
|
|
|
301 |
image_faiss = None
|
302 |
raise RuntimeError(f"Data loading failed: {str(e)}")
|
303 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
304 |
# FAISS index creation
|
305 |
class MultiModalFAISSIndex:
|
306 |
def __init__(self, dimension, index_type='L2'):
|