sachin commited on
Commit
7eab053
·
1 Parent(s): 7ee697f

some changes

Browse files
Files changed (1) hide show
  1. src/server/main.py +14 -13
src/server/main.py CHANGED
@@ -3,7 +3,6 @@ import io
3
  import os
4
  from time import time
5
  from typing import List
6
- import tempfile
7
  import uvicorn
8
  from fastapi import Depends, FastAPI, File, HTTPException, Query, Request, UploadFile, Body, Form, APIRouter
9
  from fastapi.middleware.cors import CORSMiddleware
@@ -341,22 +340,24 @@ async def synthesize_speech(tts_manager: TTSManager, text: str, ref_audio_name:
341
  logger.info(f"Synthesizing speech for text: {text[:50]}... with ref_audio: {ref_audio_name}")
342
  sample_rate, audio_data = load_audio_from_url(ref_audio_url)
343
 
344
- # Use synchronous context manager and run synthesis in a thread
345
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
346
- sf.write(temp_audio.name, audio_data, samplerate=sample_rate, format='WAV')
347
- temp_audio.flush()
348
- audio = await asyncio.to_thread(tts_manager.synthesize, text, temp_audio.name, ref_text)
349
 
350
- # Clean up temporary file
351
- os.unlink(temp_audio.name)
 
 
 
352
 
353
  if audio.dtype == np.int16:
354
  audio = audio.astype(np.float32) / 32768.0
355
- buffer = io.BytesIO()
356
- sf.write(buffer, audio, 24000, format='WAV')
357
- buffer.seek(0)
358
  logger.info("Speech synthesis completed")
359
- return buffer
360
 
361
  # Supported languages
362
  SUPPORTED_LANGUAGES = {
@@ -781,7 +782,7 @@ async def chat_v2(
781
 
782
  app.include_router(llm_router)
783
 
784
- # Improved Endpoints with GPU Optimization
785
  @app.post("/audio/speech", response_class=StreamingResponse)
786
  async def synthesize_kannada(request: KannadaSynthesizeRequest):
787
  if not tts_manager.model:
 
3
  import os
4
  from time import time
5
  from typing import List
 
6
  import uvicorn
7
  from fastapi import Depends, FastAPI, File, HTTPException, Query, Request, UploadFile, Body, Form, APIRouter
8
  from fastapi.middleware.cors import CORSMiddleware
 
340
  logger.info(f"Synthesizing speech for text: {text[:50]}... with ref_audio: {ref_audio_name}")
341
  sample_rate, audio_data = load_audio_from_url(ref_audio_url)
342
 
343
+ # Use in-memory buffer instead of temporary file
344
+ ref_audio_buffer = io.BytesIO()
345
+ sf.write(ref_audio_buffer, audio_data, sample_rate, format='WAV')
346
+ ref_audio_buffer.seek(0)
 
347
 
348
+ # Run synthesis with in-memory buffer
349
+ audio = await asyncio.to_thread(tts_manager.synthesize, text, ref_audio_buffer, ref_text)
350
+
351
+ # Ensure buffer is closed
352
+ ref_audio_buffer.close()
353
 
354
  if audio.dtype == np.int16:
355
  audio = audio.astype(np.float32) / 32768.0
356
+ output_buffer = io.BytesIO()
357
+ sf.write(output_buffer, audio, 24000, format='WAV')
358
+ output_buffer.seek(0)
359
  logger.info("Speech synthesis completed")
360
+ return output_buffer
361
 
362
  # Supported languages
363
  SUPPORTED_LANGUAGES = {
 
782
 
783
  app.include_router(llm_router)
784
 
785
+ # Improved Endpoints with GPU Optimization and In-Memory Audio
786
  @app.post("/audio/speech", response_class=StreamingResponse)
787
  async def synthesize_kannada(request: KannadaSynthesizeRequest):
788
  if not tts_manager.model: