File size: 4,186 Bytes
e3278e4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
"""
Translates from OpenAI's `/v1/audio/transcriptions` to Deepgram's `/v1/listen`
"""
from typing import List, Optional, Union
from httpx import Headers, Response
from litellm.llms.base_llm.chat.transformation import BaseLLMException
from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import (
AllMessageValues,
OpenAIAudioTranscriptionOptionalParams,
)
from litellm.types.utils import TranscriptionResponse
from ...base_llm.audio_transcription.transformation import (
BaseAudioTranscriptionConfig,
LiteLLMLoggingObj,
)
from ..common_utils import DeepgramException
class DeepgramAudioTranscriptionConfig(BaseAudioTranscriptionConfig):
def get_supported_openai_params(
self, model: str
) -> List[OpenAIAudioTranscriptionOptionalParams]:
return ["language"]
def map_openai_params(
self,
non_default_params: dict,
optional_params: dict,
model: str,
drop_params: bool,
) -> dict:
supported_params = self.get_supported_openai_params(model)
for k, v in non_default_params.items():
if k in supported_params:
optional_params[k] = v
return optional_params
def get_error_class(
self, error_message: str, status_code: int, headers: Union[dict, Headers]
) -> BaseLLMException:
return DeepgramException(
message=error_message, status_code=status_code, headers=headers
)
def transform_audio_transcription_response(
self,
model: str,
raw_response: Response,
model_response: TranscriptionResponse,
logging_obj: LiteLLMLoggingObj,
request_data: dict,
optional_params: dict,
litellm_params: dict,
api_key: Optional[str] = None,
) -> TranscriptionResponse:
"""
Transforms the raw response from Deepgram to the TranscriptionResponse format
"""
try:
response_json = raw_response.json()
# Get the first alternative from the first channel
first_channel = response_json["results"]["channels"][0]
first_alternative = first_channel["alternatives"][0]
# Extract the full transcript
text = first_alternative["transcript"]
# Create TranscriptionResponse object
response = TranscriptionResponse(text=text)
# Add additional metadata matching OpenAI format
response["task"] = "transcribe"
response["language"] = (
"english" # Deepgram auto-detects but doesn't return language
)
response["duration"] = response_json["metadata"]["duration"]
# Transform words to match OpenAI format
if "words" in first_alternative:
response["words"] = [
{"word": word["word"], "start": word["start"], "end": word["end"]}
for word in first_alternative["words"]
]
# Store full response in hidden params
response._hidden_params = response_json
return response
except Exception as e:
raise ValueError(
f"Error transforming Deepgram response: {str(e)}\nResponse: {raw_response.text}"
)
def get_complete_url(
self,
api_base: Optional[str],
model: str,
optional_params: dict,
stream: Optional[bool] = None,
) -> str:
if api_base is None:
api_base = (
get_secret_str("DEEPGRAM_API_BASE") or "https://api.deepgram.com/v1"
)
api_base = api_base.rstrip("/") # Remove trailing slash if present
return f"{api_base}/listen?model={model}"
def validate_environment(
self,
headers: dict,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
) -> dict:
api_key = api_key or get_secret_str("DEEPGRAM_API_KEY")
return {
"Authorization": f"Token {api_key}",
}
|