File size: 2,747 Bytes
84b088a
 
b3e5fcf
84b088a
 
 
 
 
22b3f6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84b088a
7e0ec98
 
 
 
84b088a
 
 
 
5537343
7859e04
5537343
84b088a
 
 
 
 
35f9a40
84b088a
 
 
 
b3e5fcf
 
 
 
 
 
 
35f9a40
b3e5fcf
 
 
 
 
75f99e0
35f9a40
 
b3e5fcf
 
 
68a6d75
b3e5fcf
 
 
 
68a6d75
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from fastapi import FastAPI, Query, HTTPException
from fastapi.responses import StreamingResponse
from fastapi.responses import FileResponse
from TTS.api import TTS
import os
from io import BytesIO
from typing import Generator


import sys

try:
    import fastapi
    import uvicorn
    import TTS
    import numpy
    import torch
    import torchaudio
    import scipy
    import typing_extensions

    print("Python version:", sys.version)
    print("fastapi version:", fastapi.__version__)
    print("uvicorn version:", uvicorn.__version__)
    print("TTS version:", TTS.__version__)
    print("numpy version:", numpy.__version__)
    print("torch version:", torch.__version__)
    print("torchaudio version:", torchaudio.__version__)
    print("scipy version:", scipy.__version__)

except ImportError as e:
    print("Missing package:", e.name)



app = FastAPI()
import os

# By using XTTS you agree to CPML license https://coqui.ai/cpml
os.environ["COQUI_TOS_AGREED"] = "1"

# Initialize the TTS model
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)  # Set gpu=True if you have GPU support

# Predefined path to the sample voice clone
FIXED_SPEAKER_WAV = "Bible Verses About Community.wav"

# Function to split text into chunks
def split_text(text: str, words_per_chunk: int = 20):
    words = text.split()
    return [' '.join(words[i:i + words_per_chunk]) for i in range(0, len(words), words_per_chunk)]



@app.post("/generate-audio/")
async def generate_audio(
    text: str = Query(..., description="The input text to convert to speech."),
    language: str = Query("en", description="Language code for TTS (e.g., 'en' for English).")):
    
        if not os.path.exists(FIXED_SPEAKER_WAV):
            raise HTTPException(status_code=400, detail="Fixed speaker WAV file not found.")

        if tts.is_multi_lingual and not language:
            raise ValueError("Language must be specified for multi-lingual models.")
        
            # Generate audio for each chunk and yield as bytes
        output_file = f"out.wav"
        try:
            tts.tts_to_file(
                    text=text,
                    file_path=output_file,
                    speaker_wav=FIXED_SPEAKER_WAV,
                    language=language
            )
            print(output_file)
            # Return the generated audio file as a response
            return FileResponse(output_file, media_type="audio/wav")

        except Exception as e:
            raise HTTPException(status_code=500, detail=f"Error generating audio: {str(e)}")
    
        # finally:
        #     # Clean up the generated file after the response is sent
        #     if os.path.exists(output_file):
        #         os.remove(output_file)