Redmind commited on
Commit
35c07dc
·
verified ·
1 Parent(s): d188171

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -4
app.py CHANGED
@@ -89,14 +89,33 @@ def extract_images_from_pptx(pptx_path):
89
  def get_text_embedding(text):
90
  return text_model.encode(text).tolist()
91
 
 
 
 
 
 
 
 
 
92
 
93
- ### Step 6: Convert Images to Embeddings ###
94
  def get_image_embedding(image_path):
 
 
 
95
  image = Image.open(image_path)
96
- inputs = clip_processor(images=image, return_tensors="pt")
 
97
  with torch.no_grad():
98
- embedding = clip_model.get_image_features(**inputs)
99
- return embedding.squeeze().tolist()
 
 
 
 
 
 
 
 
100
 
101
 
102
  ### Step 7: Store Data in ChromaDB ###
 
89
  def get_text_embedding(text):
90
  return text_model.encode(text).tolist()
91
 
92
+ from transformers import CLIPProcessor, CLIPModel
93
+ import torch
94
+ import numpy as np
95
+ from sklearn.decomposition import PCA
96
+
97
+ # ✅ Load CLIP (512-dimensional output)
98
+ model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
99
+ processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
100
 
 
101
  def get_image_embedding(image_path):
102
+ """Extracts image embedding and reduces to 384 dimensions"""
103
+ from PIL import Image
104
+
105
  image = Image.open(image_path)
106
+ inputs = processor(images=image, return_tensors="pt")
107
+
108
  with torch.no_grad():
109
+ image_embedding = model.get_image_features(**inputs) # Shape: (1, 512)
110
+
111
+ image_embedding = image_embedding.numpy().flatten() # Convert to NumPy (512,)
112
+
113
+ # ✅ Reduce to 384 dimensions using PCA
114
+ pca = PCA(n_components=384)
115
+ image_embedding_384 = pca.fit_transform(image_embedding.reshape(1, -1))
116
+
117
+ return image_embedding_384.flatten().tolist()
118
+
119
 
120
 
121
  ### Step 7: Store Data in ChromaDB ###