Redmind commited on
Commit
313edde
·
verified ·
1 Parent(s): c2710ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -4
app.py CHANGED
@@ -90,22 +90,28 @@ def extract_images_from_pptx(pptx_path):
90
  def get_text_embedding(text):
91
  return text_model.encode(text).tolist()
92
 
93
- # Extract Image Embeddings and Reduce to 384 Dimensions
 
 
94
  def get_image_embedding(image_path):
95
  try:
 
96
  image = Image.open(image_path)
97
  inputs = processor(images=image, return_tensors="pt")
 
 
98
  with torch.no_grad():
99
  image_embedding = model.get_image_features(**inputs).numpy().flatten()
100
 
101
- # Ensure embedding is 384-dimensional
102
  if len(image_embedding) != 384:
103
- pca = PCA(n_components=384)
104
  image_embedding = pca.fit_transform(image_embedding.reshape(1, -1)).flatten()
105
 
106
  return image_embedding.tolist()
 
107
  except Exception as e:
108
- print(f"Error generating image embedding: {e}")
109
  return None
110
 
111
  # Store Data in ChromaDB
 
90
  def get_text_embedding(text):
91
  return text_model.encode(text).tolist()
92
 
93
+ # Preload PCA instance globally (to maintain consistency across calls)
94
+ pca = PCA(n_components=384)
95
+
96
  def get_image_embedding(image_path):
97
  try:
98
+ # Load the image
99
  image = Image.open(image_path)
100
  inputs = processor(images=image, return_tensors="pt")
101
+
102
+ # Extract image embeddings
103
  with torch.no_grad():
104
  image_embedding = model.get_image_features(**inputs).numpy().flatten()
105
 
106
+ # Check if the embedding dimension is already 384
107
  if len(image_embedding) != 384:
108
+ # Ensure PCA transformation gets the correct shape
109
  image_embedding = pca.fit_transform(image_embedding.reshape(1, -1)).flatten()
110
 
111
  return image_embedding.tolist()
112
+
113
  except Exception as e:
114
+ print(f"Error generating image embedding: {e}")
115
  return None
116
 
117
  # Store Data in ChromaDB