Wisdom Chen commited on
Commit
df25634
·
unverified ·
1 Parent(s): 42aacb0

Update model.py

Browse files
Files changed (1) hide show
  1. model.py +39 -39
model.py CHANGED
@@ -104,6 +104,45 @@ def initialize_models() -> bool:
104
 
105
  except Exception as e:
106
  raise RuntimeError(f"Model initialization failed: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  # Data loading
109
  def load_data() -> bool:
@@ -262,45 +301,6 @@ def load_data() -> bool:
262
  image_faiss = None
263
  raise RuntimeError(f"Data loading failed: {str(e)}")
264
 
265
- def load_embeddings_from_huggingface(repo_id: str) -> Tuple[Dict, Dict]:
266
- """
267
- Load embeddings from Hugging Face repository with enhanced error handling.
268
-
269
- Args:
270
- repo_id (str): Hugging Face repository ID
271
-
272
- Returns:
273
- Tuple[Dict, Dict]: Dictionaries containing text and image embeddings
274
- """
275
- print("Loading embeddings from Hugging Face...")
276
- try:
277
- file_path = hf_hub_download(
278
- repo_id=repo_id,
279
- filename="embeddings.parquet",
280
- repo_type="dataset"
281
- )
282
- df = pd.read_parquet(file_path)
283
-
284
- # Extract embedding columns
285
- text_cols = [col for col in df.columns if col.startswith('text_embedding_')]
286
- image_cols = [col for col in df.columns if col.startswith('image_embedding_')]
287
-
288
- # Create embedding dictionaries
289
- text_embeddings_dict = {
290
- row['Uniq_Id']: row[text_cols].values.astype(np.float32)
291
- for _, row in df.iterrows()
292
- }
293
- image_embeddings_dict = {
294
- row['Uniq_Id']: row[image_cols].values.astype(np.float32)
295
- for _, row in df.iterrows()
296
- }
297
-
298
- print(f"Successfully loaded {len(text_embeddings_dict)} embeddings")
299
- return text_embeddings_dict, image_embeddings_dict
300
-
301
- except Exception as e:
302
- raise RuntimeError(f"Failed to load embeddings from Hugging Face: {str(e)}")
303
-
304
  # FAISS index creation
305
  class MultiModalFAISSIndex:
306
  def __init__(self, dimension, index_type='L2'):
 
104
 
105
  except Exception as e:
106
  raise RuntimeError(f"Model initialization failed: {str(e)}")
107
+
108
+ def load_embeddings_from_huggingface(repo_id: str) -> Tuple[Dict, Dict]:
109
+ """
110
+ Load embeddings from Hugging Face repository with enhanced error handling.
111
+
112
+ Args:
113
+ repo_id (str): Hugging Face repository ID
114
+
115
+ Returns:
116
+ Tuple[Dict, Dict]: Dictionaries containing text and image embeddings
117
+ """
118
+ print("Loading embeddings from Hugging Face...")
119
+ try:
120
+ file_path = hf_hub_download(
121
+ repo_id=repo_id,
122
+ filename="embeddings.parquet",
123
+ repo_type="dataset"
124
+ )
125
+ df = pd.read_parquet(file_path)
126
+
127
+ # Extract embedding columns
128
+ text_cols = [col for col in df.columns if col.startswith('text_embedding_')]
129
+ image_cols = [col for col in df.columns if col.startswith('image_embedding_')]
130
+
131
+ # Create embedding dictionaries
132
+ text_embeddings_dict = {
133
+ row['Uniq_Id']: row[text_cols].values.astype(np.float32)
134
+ for _, row in df.iterrows()
135
+ }
136
+ image_embeddings_dict = {
137
+ row['Uniq_Id']: row[image_cols].values.astype(np.float32)
138
+ for _, row in df.iterrows()
139
+ }
140
+
141
+ print(f"Successfully loaded {len(text_embeddings_dict)} embeddings")
142
+ return text_embeddings_dict, image_embeddings_dict
143
+
144
+ except Exception as e:
145
+ raise RuntimeError(f"Failed to load embeddings from Hugging Face: {str(e)}")
146
 
147
  # Data loading
148
  def load_data() -> bool:
 
301
  image_faiss = None
302
  raise RuntimeError(f"Data loading failed: {str(e)}")
303
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
  # FAISS index creation
305
  class MultiModalFAISSIndex:
306
  def __init__(self, dimension, index_type='L2'):