Nasma commited on
Commit
d26a5fd
·
verified ·
1 Parent(s): b0854df

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +27 -25
main.py CHANGED
@@ -6,6 +6,10 @@ from io import BytesIO
6
  from typing import Generator
7
 
8
  app = FastAPI()
 
 
 
 
9
 
10
  # Initialize the TTS model
11
  tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=False) # Set gpu=True if you have GPU support
@@ -18,24 +22,7 @@ def split_text(text: str, words_per_chunk: int = 20):
18
  words = text.split()
19
  return [' '.join(words[i:i + words_per_chunk]) for i in range(0, len(words), words_per_chunk)]
20
 
21
- # Function to generate audio chunks
22
- def generate_audio_chunks(text: str, language: str, chunk_size: int = 20) -> Generator[bytes, None, None]:
23
- if tts.is_multi_lingual and not language:
24
- raise ValueError("Language must be specified for multi-lingual models.")
25
-
26
- text_chunks = split_text(text, chunk_size)
27
-
28
- for idx, chunk in enumerate(text_chunks):
29
- # Generate audio for each chunk and yield as bytes
30
- audio_buffer = BytesIO()
31
- tts.tts_to_file(
32
- text=chunk,
33
- file_path=audio_buffer,
34
- speaker_wav=FIXED_SPEAKER_WAV,
35
- language=language
36
- )
37
- audio_buffer.seek(0)
38
- yield audio_buffer.read()
39
 
40
  @app.post("/generate-audio/")
41
  async def generate_audio(
@@ -47,10 +34,25 @@ async def generate_audio(
47
 
48
  # StreamingResponse to stream audio chunks
49
  def audio_stream():
50
- try:
51
- for audio_chunk in generate_audio_chunks(text=text, language=language):
52
- yield audio_chunk
53
- except Exception as e:
54
- raise HTTPException(status_code=500, detail=str(e))
55
-
56
- return StreamingResponse(audio_stream(), media_type="audio/wav")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  from typing import Generator
7
 
8
  app = FastAPI()
9
+ import os
10
+
11
+ # By using XTTS you agree to CPML license https://coqui.ai/cpml
12
+ os.environ["COQUI_TOS_AGREED"] = "1"
13
 
14
  # Initialize the TTS model
15
  tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=False) # Set gpu=True if you have GPU support
 
22
  words = text.split()
23
  return [' '.join(words[i:i + words_per_chunk]) for i in range(0, len(words), words_per_chunk)]
24
 
25
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  @app.post("/generate-audio/")
28
  async def generate_audio(
 
34
 
35
  # StreamingResponse to stream audio chunks
36
  def audio_stream():
37
+ if tts.is_multi_lingual and not language:
38
+ raise ValueError("Language must be specified for multi-lingual models.")
39
+
40
+ text_chunks = split_text(text, 20)
41
+
42
+ for idx, chunk in enumerate(text_chunks):
43
+ # Generate audio for each chunk and yield as bytes
44
+ output_file = f"out_{idx}.wav"
45
+ tts.tts_to_file(
46
+ text=chunk,
47
+ file_path=output_file,
48
+ speaker_wav=FIXED_SPEAKER_WAV,
49
+ language=language
50
+ )
51
+ print(output_file)
52
+ # Read the file content and yield as binary
53
+ with open(output_file, "rb") as audio_file:
54
+ yield audio_file.read()
55
+ # Optionally delete the file after streaming
56
+ os.remove(output_file)
57
+
58
+ return StreamingResponse(audio_stream(), media_type="audio/wav")