sachin
commited on
Commit
·
7eab053
1
Parent(s):
7ee697f
some changes
Browse files- src/server/main.py +14 -13
src/server/main.py
CHANGED
@@ -3,7 +3,6 @@ import io
|
|
3 |
import os
|
4 |
from time import time
|
5 |
from typing import List
|
6 |
-
import tempfile
|
7 |
import uvicorn
|
8 |
from fastapi import Depends, FastAPI, File, HTTPException, Query, Request, UploadFile, Body, Form, APIRouter
|
9 |
from fastapi.middleware.cors import CORSMiddleware
|
@@ -341,22 +340,24 @@ async def synthesize_speech(tts_manager: TTSManager, text: str, ref_audio_name:
|
|
341 |
logger.info(f"Synthesizing speech for text: {text[:50]}... with ref_audio: {ref_audio_name}")
|
342 |
sample_rate, audio_data = load_audio_from_url(ref_audio_url)
|
343 |
|
344 |
-
# Use
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
audio = await asyncio.to_thread(tts_manager.synthesize, text, temp_audio.name, ref_text)
|
349 |
|
350 |
-
#
|
351 |
-
|
|
|
|
|
|
|
352 |
|
353 |
if audio.dtype == np.int16:
|
354 |
audio = audio.astype(np.float32) / 32768.0
|
355 |
-
|
356 |
-
sf.write(
|
357 |
-
|
358 |
logger.info("Speech synthesis completed")
|
359 |
-
return
|
360 |
|
361 |
# Supported languages
|
362 |
SUPPORTED_LANGUAGES = {
|
@@ -781,7 +782,7 @@ async def chat_v2(
|
|
781 |
|
782 |
app.include_router(llm_router)
|
783 |
|
784 |
-
# Improved Endpoints with GPU Optimization
|
785 |
@app.post("/audio/speech", response_class=StreamingResponse)
|
786 |
async def synthesize_kannada(request: KannadaSynthesizeRequest):
|
787 |
if not tts_manager.model:
|
|
|
3 |
import os
|
4 |
from time import time
|
5 |
from typing import List
|
|
|
6 |
import uvicorn
|
7 |
from fastapi import Depends, FastAPI, File, HTTPException, Query, Request, UploadFile, Body, Form, APIRouter
|
8 |
from fastapi.middleware.cors import CORSMiddleware
|
|
|
340 |
logger.info(f"Synthesizing speech for text: {text[:50]}... with ref_audio: {ref_audio_name}")
|
341 |
sample_rate, audio_data = load_audio_from_url(ref_audio_url)
|
342 |
|
343 |
+
# Use in-memory buffer instead of temporary file
|
344 |
+
ref_audio_buffer = io.BytesIO()
|
345 |
+
sf.write(ref_audio_buffer, audio_data, sample_rate, format='WAV')
|
346 |
+
ref_audio_buffer.seek(0)
|
|
|
347 |
|
348 |
+
# Run synthesis with in-memory buffer
|
349 |
+
audio = await asyncio.to_thread(tts_manager.synthesize, text, ref_audio_buffer, ref_text)
|
350 |
+
|
351 |
+
# Ensure buffer is closed
|
352 |
+
ref_audio_buffer.close()
|
353 |
|
354 |
if audio.dtype == np.int16:
|
355 |
audio = audio.astype(np.float32) / 32768.0
|
356 |
+
output_buffer = io.BytesIO()
|
357 |
+
sf.write(output_buffer, audio, 24000, format='WAV')
|
358 |
+
output_buffer.seek(0)
|
359 |
logger.info("Speech synthesis completed")
|
360 |
+
return output_buffer
|
361 |
|
362 |
# Supported languages
|
363 |
SUPPORTED_LANGUAGES = {
|
|
|
782 |
|
783 |
app.include_router(llm_router)
|
784 |
|
785 |
+
# Improved Endpoints with GPU Optimization and In-Memory Audio
|
786 |
@app.post("/audio/speech", response_class=StreamingResponse)
|
787 |
async def synthesize_kannada(request: KannadaSynthesizeRequest):
|
788 |
if not tts_manager.model:
|