Spaces:
Sleeping
Sleeping
File size: 1,726 Bytes
1b7e88c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import io
from typing import Any, Optional
from openai import NOT_GIVEN, AsyncOpenAI, OpenAI
from pydub import AudioSegment
from ...base import BotBase
class STT(BotBase):
model_id: str = "whisper-1"
endpoint: Optional[str] = None
api_key: str
language: Optional[str] = None
response_format: str = "verbose_json"
class Config:
"""Configuration for this pydantic object."""
extra = "allow"
protected_namespaces = ()
def __init__(self, /, **data: Any) -> None:
super().__init__(**data)
self.client = OpenAI(base_url=self.endpoint, api_key=self.api_key)
self.aclient = AsyncOpenAI(base_url=self.endpoint, api_key=self.api_key)
def _as2bytes(self, audio: AudioSegment) -> io.BytesIO:
audio_bytes = io.BytesIO()
audio.export(audio_bytes, format="mp3")
audio_bytes.seek(0)
audio_bytes.name = "buffer.mp3"
return audio_bytes
def infer(self, audio: AudioSegment) -> dict:
audio_bytes = self._as2bytes(audio)
trans = self.client.audio.transcriptions.create(
model=self.model_id,
file=audio_bytes,
response_format=self.response_format,
language=NOT_GIVEN if self.language is None else self.language,
)
return trans.to_dict()
async def ainfer(self, audio: AudioSegment) -> dict:
audio_bytes = self._as2bytes(audio)
trans = await self.aclient.audio.transcriptions.create(
model=self.model_id,
file=audio_bytes,
response_format=self.response_format,
language=NOT_GIVEN if self.language is None else self.language,
)
return trans.to_dict()
|