Spaces:

Nasma
/

voicecloneapi

Sleeping

App Files Files Community

Nasma commited on Jan 12

Commit

d26a5fd

verified ·

1 Parent(s): b0854df

Update main.py

Browse files

Files changed (1) hide show

main.py +27 -25

main.py CHANGED Viewed

@@ -6,6 +6,10 @@ from io import BytesIO
 from typing import Generator
 app = FastAPI()
 # Initialize the TTS model
 tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)  # Set gpu=True if you have GPU support
@@ -18,24 +22,7 @@ def split_text(text: str, words_per_chunk: int = 20):
     words = text.split()
     return [' '.join(words[i:i + words_per_chunk]) for i in range(0, len(words), words_per_chunk)]
-# Function to generate audio chunks
-def generate_audio_chunks(text: str, language: str, chunk_size: int = 20) -> Generator[bytes, None, None]:
-    if tts.is_multi_lingual and not language:
-        raise ValueError("Language must be specified for multi-lingual models.")
-    text_chunks = split_text(text, chunk_size)
-    for idx, chunk in enumerate(text_chunks):
-        # Generate audio for each chunk and yield as bytes
-        audio_buffer = BytesIO()
-        tts.tts_to_file(
-            text=chunk,
-            file_path=audio_buffer,
-            speaker_wav=FIXED_SPEAKER_WAV,
-            language=language
-        )
-        audio_buffer.seek(0)
-        yield audio_buffer.read()
 @app.post("/generate-audio/")
 async def generate_audio(
@@ -47,10 +34,25 @@ async def generate_audio(
     # StreamingResponse to stream audio chunks
     def audio_stream():
-        try:
-            for audio_chunk in generate_audio_chunks(text=text, language=language):
-                yield audio_chunk
-        except Exception as e:
-            raise HTTPException(status_code=500, detail=str(e))
-    return StreamingResponse(audio_stream(), media_type="audio/wav")

 from typing import Generator
 app = FastAPI()
+import os
+# By using XTTS you agree to CPML license https://coqui.ai/cpml
+os.environ["COQUI_TOS_AGREED"] = "1"
 # Initialize the TTS model
 tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)  # Set gpu=True if you have GPU support
     words = text.split()
     return [' '.join(words[i:i + words_per_chunk]) for i in range(0, len(words), words_per_chunk)]
 @app.post("/generate-audio/")
 async def generate_audio(
     # StreamingResponse to stream audio chunks
     def audio_stream():
+            if tts.is_multi_lingual and not language:
+                raise ValueError("Language must be specified for multi-lingual models.")
+            text_chunks = split_text(text, 20)
+            for idx, chunk in enumerate(text_chunks):
+                # Generate audio for each chunk and yield as bytes
+                output_file = f"out_{idx}.wav"
+                tts.tts_to_file(
+                    text=chunk,
+                    file_path=output_file,
+                    speaker_wav=FIXED_SPEAKER_WAV,
+                    language=language
+                )
+                print(output_file)
+                # Read the file content and yield as binary
+                with open(output_file, "rb") as audio_file:
+                    yield audio_file.read()
+                # Optionally delete the file after streaming
+                os.remove(output_file)
+    return StreamingResponse(audio_stream(), media_type="audio/wav")