File size: 1,464 Bytes
42899a2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
from ops_mm_embedding_v1 import OpsMMEmbeddingV1, fetch_image
model = OpsMMEmbeddingV1(
"OpenSearch-AI/Ops-MM-embedding-v1-2B",
device="cuda",
attn_implementation="flash_attention_2"
)
t2i_prompt = "Find an image that matches the given text."
texts = [
"The Tesla Cybertruck is a battery electric pickup truck built by Tesla, Inc. since 2023.",
"Alibaba office.",
"Alibaba office.",
]
images = [
"https://upload.wikimedia.org/wikipedia/commons/e/e9/Tesla_Cybertruck_damaged_window.jpg",
"https://upload.wikimedia.org/wikipedia/commons/e/e0/TaobaoCity_Alibaba_Xixi_Park.jpg",
"https://upload.wikimedia.org/wikipedia/commons/thumb/b/b0/Alibaba_Binjiang_Park.jpg/1024px-Alibaba_Binjiang_Park.jpg"
]
images = [fetch_image(image) for image in images]
# Text and image embedding
text_embeddings = model.get_text_embeddings(texts)
image_embeddings = model.get_image_embeddings(images)
print('Text and image embeddings', (text_embeddings @ image_embeddings.T).tolist())
# Fused Embedding
text_with_image_embeddings = model.get_fused_embeddings(texts=texts, images=images, instruction=t2i_prompt)
print('Text and image embeddings', (text_embeddings @ image_embeddings.T).tolist())
# Multi-image embeddings
multi_images = [
[images[0]],
[images[1], images[2]],
]
multi_image_embeddings = model.get_image_embeddings(multi_images)
print('Multi-image embeddings', (multi_image_embeddings @ multi_image_embeddings.T).tolist())
|