File size: 989 Bytes
75f07f8 aafaf12 75f07f8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
import asyncio
from infinity_emb import AsyncEngineArray, EngineArgs, AsyncEmbeddingEngine
import numpy as np
array = AsyncEngineArray.from_args([
EngineArgs(model_name_or_path = "openbmb/MiniCPM-Embedding-Light", engine="torch", dtype="float16", bettertransformer=False, pooling_method="mean", trust_remote_code=True),
])
queries = ["中国的首都是哪里?"] # "What is the capital of China?"
passages = ["beijing", "shanghai"] # "北京", "上海"
INSTRUCTION = "Query:"
queries = [f"{INSTRUCTION} {query}" for query in queries]
async def embed_text(engine: AsyncEmbeddingEngine,sentences):
async with engine:
embeddings, usage = await engine.embed(sentences=sentences)
return embeddings
queries_embedding = asyncio.run(embed_text(array[0],queries))
passages_embedding = asyncio.run(embed_text(array[0],passages))
scores = (np.array(queries_embedding) @ np.array(passages_embedding).T)
print(scores.tolist()) # [[0.40356746315956116, 0.36183443665504456]] |