|
from typing import Any, List |
|
|
|
from openai import AsyncAzureOpenAI, AzureOpenAI |
|
|
|
from ...utils.registry import registry |
|
from .base import EncoderBase |
|
|
|
|
|
@registry.register_encoder() |
|
class AzureTextEmbeddingV3(EncoderBase): |
|
model_id: str = "text-embedding-3-large" |
|
api_key: str |
|
dim: int = 3072 |
|
api_version: str |
|
|
|
class Config: |
|
"""Configuration for this pydantic object.""" |
|
|
|
protected_namespaces = () |
|
extra = "allow" |
|
|
|
def __init__(self, /, **data: Any) -> None: |
|
super().__init__(**data) |
|
self.client = AzureOpenAI(azure_endpoint=self.endpoint, api_key=self.api_key, api_version=self.api_version) |
|
self.aclient = AsyncAzureOpenAI(azure_endpoint=self.endpoint, api_key=self.api_key, api_version=self.api_version) |
|
|
|
def _infer(self, data: List[str], **kwargs) -> List[List[float]]: |
|
res = self.client.embeddings.create(input=data, model=self.model_id) |
|
return [item.embedding for item in res.data] |
|
|
|
async def _ainfer(self, data: List[str], **kwargs) -> List[List[float]]: |
|
res = await self.aclient.embeddings.create(input=data, model=self.model_id) |
|
return [item.embedding for item in res.data] |
|
|