Spaces:
Sleeping
Sleeping
File size: 1,419 Bytes
bbcc5b2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
from PIL import Image
import io
from transformers import AutoTokenizer, CLIPProcessor, CLIPModel
import torch
# Load CLIP model and processor
model_name = "openai/clip-vit-base-patch32"
loaded_model = CLIPModel.from_pretrained(model_name)
loaded_processor = CLIPProcessor.from_pretrained(model_name)
def getTextEmbedding(text):
# Preprocess the text
print("tear")
inputs_text = loaded_processor(text=[text], return_tensors="pt", padding=True)
print("here")
# Forward pass through the model
with torch.no_grad():
# Get the text features
text_features = loaded_model.get_text_features(input_ids=inputs_text.input_ids, attention_mask=inputs_text.attention_mask)
print("bear")
# Convert tensor to numpy array for better readability
text_embedding = text_features.squeeze().numpy()
print("done")
return text_embedding
def getImageEmbedding(binary_image_data):
# Load and preprocess the image
image = Image.open(io.BytesIO(binary_image_data))
inputs = loaded_processor(images=image, return_tensors="pt", padding=True)
# Forward pass through the model
with torch.no_grad():
# Get the image features
image_features = loaded_model.get_image_features(pixel_values=inputs.pixel_values)
# Convert tensor to numpy array for better readability
image_embedding = image_features.squeeze().numpy()
return image_embedding
|