File size: 1,419 Bytes
bbcc5b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
from PIL import Image
import io
from transformers import AutoTokenizer, CLIPProcessor, CLIPModel
import torch

# Load CLIP model and processor
model_name = "openai/clip-vit-base-patch32"
loaded_model = CLIPModel.from_pretrained(model_name)
loaded_processor = CLIPProcessor.from_pretrained(model_name)

def getTextEmbedding(text):
    # Preprocess the text
    print("tear")
    inputs_text = loaded_processor(text=[text], return_tensors="pt", padding=True)
    print("here")
    # Forward pass through the model
    with torch.no_grad():
        # Get the text features
        text_features = loaded_model.get_text_features(input_ids=inputs_text.input_ids, attention_mask=inputs_text.attention_mask)
    print("bear")
    # Convert tensor to numpy array for better readability
    text_embedding = text_features.squeeze().numpy()
    print("done")
    return text_embedding

def getImageEmbedding(binary_image_data):
    # Load and preprocess the image
    image = Image.open(io.BytesIO(binary_image_data))
    inputs = loaded_processor(images=image, return_tensors="pt", padding=True)

    # Forward pass through the model
    with torch.no_grad():
        # Get the image features
        image_features = loaded_model.get_image_features(pixel_values=inputs.pixel_values)

    # Convert tensor to numpy array for better readability
    image_embedding = image_features.squeeze().numpy()

    return image_embedding