Spaces:

Nasma
/

voicecloneapi

Sleeping

App Files Files Community

Nasma commited on Jan 12

Commit

5537343

verified ·

1 Parent(s): e32f010

Update main.py

Browse files

Files changed (1) hide show

main.py +9 -9

main.py CHANGED Viewed

@@ -10,15 +10,16 @@ app = FastAPI()
 # Initialize the TTS model
 tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)  # Set gpu=True if you have GPU support
 # Function to split text into chunks
 def split_text(text: str, words_per_chunk: int = 20):
     words = text.split()
     return [' '.join(words[i:i + words_per_chunk]) for i in range(0, len(words), words_per_chunk)]
 # Function to generate audio chunks
-def generate_audio_chunks(
-    text: str, speaker_wav: str, language: str, chunk_size: int = 20
-) -> Generator[bytes, None, None]:
     if tts.is_multi_lingual and not language:
         raise ValueError("Language must be specified for multi-lingual models.")
@@ -30,7 +31,7 @@ def generate_audio_chunks(
         tts.tts_to_file(
             text=chunk,
             file_path=audio_buffer,
-            speaker_wav=speaker_wav,
             language=language
         )
         audio_buffer.seek(0)
@@ -39,16 +40,15 @@ def generate_audio_chunks(
 @app.post("/generate-audio/")
 async def generate_audio(
     text: str = Query(..., description="The input text to convert to speech."),
-    language: str = Query("en", description="Language code for TTS (e.g., 'en' for English)."),
-    speaker_wav: str = Query(..., description="Path to the WAV file for voice cloning.")
 ):
-    if not os.path.exists(speaker_wav):
-        raise HTTPException(status_code=400, detail="Speaker WAV file not found.")
     # StreamingResponse to stream audio chunks
     def audio_stream():
         try:
-            for audio_chunk in generate_audio_chunks(text=text, speaker_wav=speaker_wav, language=language):
                 yield audio_chunk
         except Exception as e:
             raise HTTPException(status_code=500, detail=str(e))

 # Initialize the TTS model
 tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)  # Set gpu=True if you have GPU support
+# Predefined path to the sample voice clone
+FIXED_SPEAKER_WAV = "C:/Users/nasma/OneDrive/Desktop/voiceclone/voicecloneapi/Bible Verses About Community.wav"
 # Function to split text into chunks
 def split_text(text: str, words_per_chunk: int = 20):
     words = text.split()
     return [' '.join(words[i:i + words_per_chunk]) for i in range(0, len(words), words_per_chunk)]
 # Function to generate audio chunks
+def generate_audio_chunks(text: str, language: str, chunk_size: int = 20) -> Generator[bytes, None, None]:
     if tts.is_multi_lingual and not language:
         raise ValueError("Language must be specified for multi-lingual models.")
         tts.tts_to_file(
             text=chunk,
             file_path=audio_buffer,
+            speaker_wav=FIXED_SPEAKER_WAV,
             language=language
         )
         audio_buffer.seek(0)
 @app.post("/generate-audio/")
 async def generate_audio(
     text: str = Query(..., description="The input text to convert to speech."),
+    language: str = Query("en", description="Language code for TTS (e.g., 'en' for English).")
 ):
+    if not os.path.exists(FIXED_SPEAKER_WAV):
+        raise HTTPException(status_code=400, detail="Fixed speaker WAV file not found.")
     # StreamingResponse to stream audio chunks
     def audio_stream():
         try:
+            for audio_chunk in generate_audio_chunks(text=text, language=language):
                 yield audio_chunk
         except Exception as e:
             raise HTTPException(status_code=500, detail=str(e))