Spaces:
Configuration error
Configuration error
File size: 8,916 Bytes
2b42466 42343e0 526f427 3e15f14 2b42466 3e15f14 3344380 3e15f14 069092a fa8a19e 3e15f14 f58fddb 3e15f14 f58fddb 2b42466 f58fddb 2b42466 f58fddb 2b42466 f58fddb 2b42466 f58fddb fa8a19e 3e15f14 79f1f8d f58fddb 79f1f8d f58fddb 3e15f14 f58fddb 2b42466 3e15f14 2b42466 f58fddb 2b42466 3e15f14 2b42466 3e15f14 f58fddb 2b42466 01b8eeb dc4f25f 3344380 79f1f8d f58fddb 2b42466 3344380 2b42466 f58fddb 2b42466 f58fddb 2b42466 3344380 2b42466 3344380 526f427 3344380 526f427 3344380 526f427 3344380 3e15f14 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 |
from collections.abc import AsyncGenerator
from pathlib import Path
import platform
import gradio as gr
import httpx
from httpx_sse import aconnect_sse
from openai import AsyncOpenAI
from faster_whisper_server.config import Config, Task
from faster_whisper_server.hf_utils import PiperModel
TRANSCRIPTION_ENDPOINT = "/v1/audio/transcriptions"
TRANSLATION_ENDPOINT = "/v1/audio/translations"
TIMEOUT_SECONDS = 180
TIMEOUT = httpx.Timeout(timeout=TIMEOUT_SECONDS)
# NOTE: `gr.Request` seems to be passed in as the last positional (not keyword) argument
def base_url_from_gradio_req(request: gr.Request) -> str:
# NOTE: `request.request.url` seems to always have a path of "/gradio_api/queue/join"
assert request.request is not None
return f"{request.request.url.scheme}://{request.request.url.netloc}"
def http_client_from_gradio_req(request: gr.Request, config: Config) -> httpx.AsyncClient:
base_url = base_url_from_gradio_req(request)
return httpx.AsyncClient(
base_url=base_url,
timeout=TIMEOUT,
headers={"Authorization": f"Bearer {config.api_key}"} if config.api_key else None,
)
def openai_client_from_gradio_req(request: gr.Request, config: Config) -> AsyncOpenAI:
base_url = base_url_from_gradio_req(request)
return AsyncOpenAI(base_url=f"{base_url}/v1", api_key=config.api_key if config.api_key else "cant-be-empty")
def create_gradio_demo(config: Config) -> gr.Blocks: # noqa: C901, PLR0915
async def whisper_handler(
file_path: str, model: str, task: Task, temperature: float, stream: bool, request: gr.Request
) -> AsyncGenerator[str, None]:
http_client = http_client_from_gradio_req(request, config)
if task == Task.TRANSCRIBE:
endpoint = TRANSCRIPTION_ENDPOINT
elif task == Task.TRANSLATE:
endpoint = TRANSLATION_ENDPOINT
if stream:
previous_transcription = ""
async for transcription in streaming_audio_task(http_client, file_path, endpoint, temperature, model):
previous_transcription += transcription
yield previous_transcription
else:
yield await audio_task(http_client, file_path, endpoint, temperature, model)
async def audio_task(
http_client: httpx.AsyncClient, file_path: str, endpoint: str, temperature: float, model: str
) -> str:
with Path(file_path).open("rb") as file: # noqa: ASYNC230
response = await http_client.post(
endpoint,
files={"file": file},
data={
"model": model,
"response_format": "text",
"temperature": temperature,
},
)
response.raise_for_status()
return response.text
async def streaming_audio_task(
http_client: httpx.AsyncClient, file_path: str, endpoint: str, temperature: float, model: str
) -> AsyncGenerator[str, None]:
with Path(file_path).open("rb") as file: # noqa: ASYNC230
kwargs = {
"files": {"file": file},
"data": {
"response_format": "text",
"temperature": temperature,
"model": model,
"stream": True,
},
}
async with aconnect_sse(http_client, "POST", endpoint, **kwargs) as event_source:
async for event in event_source.aiter_sse():
yield event.data
async def update_whisper_model_dropdown(request: gr.Request) -> gr.Dropdown:
openai_client = openai_client_from_gradio_req(request, config)
models = (await openai_client.models.list()).data
model_names: list[str] = [model.id for model in models]
assert config.whisper.model in model_names
recommended_models = {model for model in model_names if model.startswith("Systran")}
other_models = [model for model in model_names if model not in recommended_models]
model_names = list(recommended_models) + other_models
return gr.Dropdown(
choices=model_names,
label="Model",
value=config.whisper.model,
)
async def update_piper_voices_dropdown(request: gr.Request) -> gr.Dropdown:
http_client = http_client_from_gradio_req(request, config)
res = (await http_client.get("/v1/audio/speech/voices")).raise_for_status()
piper_models = [PiperModel.model_validate(x) for x in res.json()]
return gr.Dropdown(choices=[model.voice for model in piper_models], label="Voice", value=DEFAULT_VOICE)
async def handle_audio_speech(
text: str, voice: str, response_format: str, speed: float, sample_rate: int | None, request: gr.Request
) -> Path:
openai_client = openai_client_from_gradio_req(request, config)
res = await openai_client.audio.speech.create(
input=text,
model="piper",
voice=voice, # pyright: ignore[reportArgumentType]
response_format=response_format, # pyright: ignore[reportArgumentType]
speed=speed,
extra_body={"sample_rate": sample_rate},
)
audio_bytes = res.response.read()
file_path = Path(f"audio.{response_format}")
with file_path.open("wb") as file: # noqa: ASYNC230
file.write(audio_bytes)
return file_path
with gr.Blocks(title="faster-whisper-server Playground") as demo:
gr.Markdown(
"### Consider supporting the project by starring the [repository on GitHub](https://github.com/fedirz/faster-whisper-server)."
)
with gr.Tab(label="Transcribe/Translate"):
audio = gr.Audio(type="filepath")
model_dropdown = gr.Dropdown(
choices=[config.whisper.model],
label="Model",
value=config.whisper.model,
)
task_dropdown = gr.Dropdown(
choices=[task.value for task in Task],
label="Task",
value=Task.TRANSCRIBE,
)
temperature_slider = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, label="Temperature", value=0.0)
stream_checkbox = gr.Checkbox(label="Stream", value=True)
button = gr.Button("Generate")
output = gr.Textbox()
# NOTE: the inputs order must match the `whisper_handler` signature
button.click(
whisper_handler, [audio, model_dropdown, task_dropdown, temperature_slider, stream_checkbox], output
)
with gr.Tab(label="Speech Generation"):
if platform.machine() != "x86_64":
from faster_whisper_server.routers.speech import (
DEFAULT_VOICE,
MAX_SAMPLE_RATE,
MIN_SAMPLE_RATE,
SUPPORTED_RESPONSE_FORMATS,
)
text = gr.Textbox(label="Input Text")
voice_dropdown = gr.Dropdown(
choices=["en_US-amy-medium"],
label="Voice",
value="en_US-amy-medium",
info="""
The last part of the voice name is the quality (x_low, low, medium, high).
Each quality has a different default sample rate:
- x_low: 16000 Hz
- low: 16000 Hz
- medium: 22050 Hz
- high: 22050 Hz
""",
)
response_fromat_dropdown = gr.Dropdown(
choices=SUPPORTED_RESPONSE_FORMATS,
label="Response Format",
value="wav",
)
speed_slider = gr.Slider(minimum=0.25, maximum=4.0, step=0.05, label="Speed", value=1.0)
sample_rate_slider = gr.Number(
minimum=MIN_SAMPLE_RATE,
maximum=MAX_SAMPLE_RATE,
label="Desired Sample Rate",
info="""
Setting this will resample the generated audio to the desired sample rate.
You may want to set this if you are going to use voices of different qualities but want to keep the same sample rate.
Default: None (No resampling)
""",
value=lambda: None,
)
button = gr.Button("Generate Speech")
output = gr.Audio(type="filepath")
button.click(
handle_audio_speech,
[text, voice_dropdown, response_fromat_dropdown, speed_slider, sample_rate_slider],
output,
)
demo.load(update_piper_voices_dropdown, inputs=None, outputs=voice_dropdown)
else:
gr.Textbox("Speech generation is only supported on x86_64 machines.")
demo.load(update_whisper_model_dropdown, inputs=None, outputs=model_dropdown)
return demo
|