import asyncio from infinity_emb import AsyncEngineArray, EngineArgs, AsyncEmbeddingEngine query = "中国的首都是哪里?" # "What is the capital of China?" docs = ["beijing", "shanghai"] # "北京", "上海" INSTRUCTION = "Query:" query = f"{INSTRUCTION} {query}" array = AsyncEngineArray.from_args( [EngineArgs(model_name_or_path = "OpenBMB/MiniCPM-Reranker-Light", engine="torch", dtype="float16", bettertransformer=False, trust_remote_code=True, model_warmup=False)] ) async def rerank(engine: AsyncEmbeddingEngine): async with engine: ranking, usage = await engine.rerank(query=query, docs=docs) print(list(zip(ranking, docs))) asyncio.run(rerank(array[0])) # [(RerankReturnType(relevance_score=0.017917344, document='beijing', index=0), 'beijing'), (RerankReturnType(relevance_score=0.00024729347, document='shanghai', index=1), 'shanghai')]