Spaces:

slabstech
/

dhwani-internal-api-server

Paused

App Files Files Community

sachin commited on 13 days ago

Commit

7eab053

1 Parent(s): 7ee697f

some changes

Browse files

Files changed (1) hide show

src/server/main.py +14 -13

src/server/main.py CHANGED Viewed

@@ -3,7 +3,6 @@ import io
 import os
 from time import time
 from typing import List
-import tempfile
 import uvicorn
 from fastapi import Depends, FastAPI, File, HTTPException, Query, Request, UploadFile, Body, Form, APIRouter
 from fastapi.middleware.cors import CORSMiddleware
@@ -341,22 +340,24 @@ async def synthesize_speech(tts_manager: TTSManager, text: str, ref_audio_name:
     logger.info(f"Synthesizing speech for text: {text[:50]}... with ref_audio: {ref_audio_name}")
     sample_rate, audio_data = load_audio_from_url(ref_audio_url)
-    # Use synchronous context manager and run synthesis in a thread
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
-        sf.write(temp_audio.name, audio_data, samplerate=sample_rate, format='WAV')
-        temp_audio.flush()
-        audio = await asyncio.to_thread(tts_manager.synthesize, text, temp_audio.name, ref_text)
-    # Clean up temporary file
-    os.unlink(temp_audio.name)
     if audio.dtype == np.int16:
         audio = audio.astype(np.float32) / 32768.0
-    buffer = io.BytesIO()
-    sf.write(buffer, audio, 24000, format='WAV')
-    buffer.seek(0)
     logger.info("Speech synthesis completed")
-    return buffer
 # Supported languages
 SUPPORTED_LANGUAGES = {
@@ -781,7 +782,7 @@ async def chat_v2(
 app.include_router(llm_router)
-# Improved Endpoints with GPU Optimization
 @app.post("/audio/speech", response_class=StreamingResponse)
 async def synthesize_kannada(request: KannadaSynthesizeRequest):
     if not tts_manager.model:

 import os
 from time import time
 from typing import List
 import uvicorn
 from fastapi import Depends, FastAPI, File, HTTPException, Query, Request, UploadFile, Body, Form, APIRouter
 from fastapi.middleware.cors import CORSMiddleware
     logger.info(f"Synthesizing speech for text: {text[:50]}... with ref_audio: {ref_audio_name}")
     sample_rate, audio_data = load_audio_from_url(ref_audio_url)
+    # Use in-memory buffer instead of temporary file
+    ref_audio_buffer = io.BytesIO()
+    sf.write(ref_audio_buffer, audio_data, sample_rate, format='WAV')
+    ref_audio_buffer.seek(0)
+    # Run synthesis with in-memory buffer
+    audio = await asyncio.to_thread(tts_manager.synthesize, text, ref_audio_buffer, ref_text)
+    # Ensure buffer is closed
+    ref_audio_buffer.close()
     if audio.dtype == np.int16:
         audio = audio.astype(np.float32) / 32768.0
+    output_buffer = io.BytesIO()
+    sf.write(output_buffer, audio, 24000, format='WAV')
+    output_buffer.seek(0)
     logger.info("Speech synthesis completed")
+    return output_buffer
 # Supported languages
 SUPPORTED_LANGUAGES = {
 app.include_router(llm_router)
+# Improved Endpoints with GPU Optimization and In-Memory Audio
 @app.post("/audio/speech", response_class=StreamingResponse)
 async def synthesize_kannada(request: KannadaSynthesizeRequest):
     if not tts_manager.model: