File size: 1,091 Bytes
0d38ded
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from model import CLIPChemistryModel, TextEncoderHead, ImageEncoderHead
from transformers import ViTModel, AutoModelForMaskedLM, AutoTokenizer, ViTImageProcessor

from io import BytesIO
import base64

from PIL import Image

def bytes_to_str(bytes_data):
    return base64.b64encode(bytes_data).decode('utf-8')

def str_to_bytes(str_data):
    return base64.b64decode(str_data)

def push_embeddings_to_pine_cone(index, embeddings, df, mode, length):
    records = []
    for i in range(length):
        if mode == 'text':
            records.append({
                "id": str(mode) + str(i), 
                "values": embeddings[i],
                "metadata": {str(mode): df[mode].iloc[i]}})
        elif mode == 'image':
            records.append({
                "id": str(mode) + str(i), 
                "values": embeddings[i],
                "metadata": {str(mode): bytes_to_str(df[mode].iloc[i]['bytes'])}})
        else:
            raise ValueError("mode must be either 'text' or 'image'")
    
    index.upsert(
        vectors=records,
        namespace="space-" + mode
    )