|
from ops_mm_embedding_v1 import OpsMMEmbeddingV1, fetch_image |
|
|
|
|
|
model = OpsMMEmbeddingV1( |
|
"OpenSearch-AI/Ops-MM-embedding-v1-7B", |
|
device="cuda", |
|
attn_implementation="flash_attention_2" |
|
) |
|
|
|
t2i_prompt = "Find an image that matches the given text." |
|
texts = [ |
|
"The Tesla Cybertruck is a battery electric pickup truck built by Tesla, Inc. since 2023.", |
|
"Alibaba office.", |
|
"Alibaba office.", |
|
] |
|
images = [ |
|
"https://upload.wikimedia.org/wikipedia/commons/e/e9/Tesla_Cybertruck_damaged_window.jpg", |
|
"https://upload.wikimedia.org/wikipedia/commons/e/e0/TaobaoCity_Alibaba_Xixi_Park.jpg", |
|
"https://upload.wikimedia.org/wikipedia/commons/thumb/b/b0/Alibaba_Binjiang_Park.jpg/1024px-Alibaba_Binjiang_Park.jpg" |
|
] |
|
|
|
images = [fetch_image(image) for image in images] |
|
|
|
|
|
text_embeddings = model.get_text_embeddings(texts) |
|
image_embeddings = model.get_image_embeddings(images) |
|
print('Text and image embeddings', (text_embeddings @ image_embeddings.T).tolist()) |
|
|
|
|
|
text_with_image_embeddings = model.get_fused_embeddings(texts=texts, images=images, instruction=t2i_prompt) |
|
print('Text and image embeddings', (text_embeddings @ image_embeddings.T).tolist()) |
|
|
|
|
|
multi_images = [ |
|
[images[0]], |
|
[images[1], images[2]], |
|
] |
|
multi_image_embeddings = model.get_image_embeddings(multi_images) |
|
print('Multi-image embeddings', (multi_image_embeddings @ multi_image_embeddings.T).tolist()) |
|
|