Spaces:
Running
Running
orztv
commited on
Commit
·
76374ce
1
Parent(s):
37635f4
update
Browse files- Dockerfile +10 -3
- src/openai-edge-tts.sh +21 -0
- src/openai-edge-tts/.env.example +10 -0
- src/openai-edge-tts/Dockerfile +11 -0
- src/openai-edge-tts/app/server.py +75 -0
- src/openai-edge-tts/app/tts_handler.py +74 -0
- src/openai-edge-tts/app/utils.py +38 -0
- src/openai-edge-tts/requirements.txt +5 -0
- src/startup.sh +16 -5
Dockerfile
CHANGED
@@ -4,6 +4,12 @@ ENV USER=pn \
|
|
4 |
HOMEDIR=/home/pn \
|
5 |
PORT=7860 \
|
6 |
REMIX_NAME=remix-app
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
9 |
apt-utils \
|
@@ -17,7 +23,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
17 |
|
18 |
WORKDIR ${HOMEDIR}
|
19 |
|
20 |
-
#
|
21 |
COPY src/* ${HOMEDIR}/
|
22 |
|
23 |
# 给所有 .sh 文件添加执行权限
|
@@ -25,8 +31,9 @@ RUN chmod +x ${HOMEDIR}/*.sh
|
|
25 |
|
26 |
# 运行 setup.sh、sshx.sh 和 remix.sh
|
27 |
RUN ${HOMEDIR}/setup.sh \
|
28 |
-
&& ${HOMEDIR}/remix.sh \
|
29 |
-
&& ${HOMEDIR}/sshx.sh
|
|
|
30 |
|
31 |
# 暴露 Remix 端口
|
32 |
EXPOSE ${PORT}
|
|
|
4 |
HOMEDIR=/home/pn \
|
5 |
PORT=7860 \
|
6 |
REMIX_NAME=remix-app
|
7 |
+
SSHX_INSTALL=true
|
8 |
+
|
9 |
+
# 以下2选1,不然端口冲突
|
10 |
+
ENV OPENAI_EDGE_TTS_INSTALL=true \
|
11 |
+
REMIX_INSTALL=false
|
12 |
+
|
13 |
|
14 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
15 |
apt-utils \
|
|
|
23 |
|
24 |
WORKDIR ${HOMEDIR}
|
25 |
|
26 |
+
# 复制src文件夹下的所有文件夹及文件到工作目录
|
27 |
COPY src/* ${HOMEDIR}/
|
28 |
|
29 |
# 给所有 .sh 文件添加执行权限
|
|
|
31 |
|
32 |
# 运行 setup.sh、sshx.sh 和 remix.sh
|
33 |
RUN ${HOMEDIR}/setup.sh \
|
34 |
+
&& if [ "$REMIX_INSTALL" = true ]; then ${HOMEDIR}/remix.sh; fi \
|
35 |
+
&& if [ "$SSHX_INSTALL" = true ]; then ${HOMEDIR}/sshx.sh; fi \
|
36 |
+
&& if [ "$OPENAI_EDGE_TTS_INSTALL" = true ]; then ${HOMEDIR}/openai-edge-tts.sh; fi
|
37 |
|
38 |
# 暴露 Remix 端口
|
39 |
EXPOSE ${PORT}
|
src/openai-edge-tts.sh
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/sh
|
2 |
+
set -e
|
3 |
+
|
4 |
+
echo "开始安装 openai-edge-tts..."
|
5 |
+
|
6 |
+
cp -r /tmp/openai-edge-tts $HOMEDIR/openai-edge-tts
|
7 |
+
|
8 |
+
#检查openai-edge-tts文件夹是否存在
|
9 |
+
if [ ! -d "$HOMEDIR/openai-edge-tts" ]; then
|
10 |
+
echo "openai-edge-tts文件夹不存在"
|
11 |
+
exit 1
|
12 |
+
fi
|
13 |
+
|
14 |
+
# 进入openai-edge-tts文件夹并安装依赖
|
15 |
+
cd $HOMEDIR/openai-edge-tts
|
16 |
+
pip install -r requirements.txt
|
17 |
+
|
18 |
+
# .env 文件
|
19 |
+
cp .env.example .env
|
20 |
+
|
21 |
+
echo "openai-edge-tts安装完成"
|
src/openai-edge-tts/.env.example
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
API_KEY=your_api_key_here
|
2 |
+
PORT=7860
|
3 |
+
|
4 |
+
DEFAULT_VOICE=en-US-AndrewNeural
|
5 |
+
DEFAULT_RESPONSE_FORMAT=mp3
|
6 |
+
DEFAULT_SPEED=1.0
|
7 |
+
|
8 |
+
DEFAULT_LANGUAGE=en-US
|
9 |
+
|
10 |
+
REQUIRE_API_KEY=True
|
src/openai-edge-tts/Dockerfile
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Dockerfile
|
2 |
+
|
3 |
+
FROM python:3.12-slim
|
4 |
+
|
5 |
+
WORKDIR /app
|
6 |
+
|
7 |
+
COPY requirements.txt /app
|
8 |
+
RUN pip install -r requirements.txt
|
9 |
+
|
10 |
+
COPY app/ /app
|
11 |
+
CMD ["python", "server.py"]
|
src/openai-edge-tts/app/server.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# server.py
|
2 |
+
|
3 |
+
from flask import Flask, request, send_file, jsonify
|
4 |
+
from gevent.pywsgi import WSGIServer
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
import os
|
7 |
+
|
8 |
+
from tts_handler import generate_speech, get_models, get_voices
|
9 |
+
from utils import require_api_key, AUDIO_FORMAT_MIME_TYPES
|
10 |
+
|
11 |
+
app = Flask(__name__)
|
12 |
+
load_dotenv()
|
13 |
+
|
14 |
+
API_KEY = os.getenv('API_KEY', 'your_api_key_here')
|
15 |
+
PORT = int(os.getenv('PORT', 5050))
|
16 |
+
|
17 |
+
DEFAULT_VOICE = os.getenv('DEFAULT_VOICE', 'en-US-AndrewNeural')
|
18 |
+
DEFAULT_RESPONSE_FORMAT = os.getenv('DEFAULT_RESPONSE_FORMAT', 'mp3')
|
19 |
+
DEFAULT_SPEED = float(os.getenv('DEFAULT_SPEED', 1.0))
|
20 |
+
|
21 |
+
# DEFAULT_MODEL = os.getenv('DEFAULT_MODEL', 'tts-1')
|
22 |
+
|
23 |
+
@app.route('/v1/audio/speech', methods=['POST'])
|
24 |
+
@require_api_key
|
25 |
+
def text_to_speech():
|
26 |
+
data = request.json
|
27 |
+
if not data or 'input' not in data:
|
28 |
+
return jsonify({"error": "Missing 'input' in request body"}), 400
|
29 |
+
|
30 |
+
text = data.get('input')
|
31 |
+
# model = data.get('model', DEFAULT_MODEL)
|
32 |
+
voice = data.get('voice', DEFAULT_VOICE)
|
33 |
+
|
34 |
+
response_format = data.get('response_format', DEFAULT_RESPONSE_FORMAT)
|
35 |
+
speed = float(data.get('speed', DEFAULT_SPEED))
|
36 |
+
|
37 |
+
mime_type = AUDIO_FORMAT_MIME_TYPES.get(response_format, "audio/mpeg")
|
38 |
+
|
39 |
+
# Generate the audio file in the specified format with speed adjustment
|
40 |
+
output_file_path = generate_speech(text, voice, response_format, speed)
|
41 |
+
|
42 |
+
# Return the file with the correct MIME type
|
43 |
+
return send_file(output_file_path, mimetype=mime_type, as_attachment=True, download_name=f"speech.{response_format}")
|
44 |
+
|
45 |
+
@app.route('/v1/models', methods=['GET', 'POST'])
|
46 |
+
@require_api_key
|
47 |
+
def list_models():
|
48 |
+
return jsonify({"data": get_models()})
|
49 |
+
|
50 |
+
@app.route('/v1/voices', methods=['GET', 'POST'])
|
51 |
+
@require_api_key
|
52 |
+
def list_voices():
|
53 |
+
specific_language = None
|
54 |
+
|
55 |
+
data = request.args if request.method == 'GET' else request.json
|
56 |
+
if data and ('language' in data or 'locale' in data):
|
57 |
+
specific_language = data.get('language') if 'language' in data else data.get('locale')
|
58 |
+
|
59 |
+
return jsonify({"voices": get_voices(specific_language)})
|
60 |
+
|
61 |
+
@app.route('/v1/voices/all', methods=['GET', 'POST'])
|
62 |
+
@require_api_key
|
63 |
+
def list_all_voices():
|
64 |
+
return jsonify({"voices": get_voices('all')})
|
65 |
+
|
66 |
+
print(f" Edge TTS (Free Azure TTS) Replacement for OpenAI's TTS API")
|
67 |
+
print(f" ")
|
68 |
+
print(f" * Serving OpenAI Edge TTS")
|
69 |
+
print(f" * Server running on http://localhost:{PORT}")
|
70 |
+
print(f" * TTS Endpoint: http://localhost:{PORT}/v1/audio/speech")
|
71 |
+
print(f" ")
|
72 |
+
|
73 |
+
if __name__ == '__main__':
|
74 |
+
http_server = WSGIServer(('0.0.0.0', PORT), app)
|
75 |
+
http_server.serve_forever()
|
src/openai-edge-tts/app/tts_handler.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# tts_handler.py
|
2 |
+
|
3 |
+
import edge_tts
|
4 |
+
import asyncio
|
5 |
+
import tempfile
|
6 |
+
import subprocess
|
7 |
+
import os
|
8 |
+
|
9 |
+
# Language default (environment variable)
|
10 |
+
DEFAULT_LANGUAGE = os.getenv('DEFAULT_LANGUAGE', 'en-US')
|
11 |
+
|
12 |
+
# OpenAI voice names mapped to edge-tts equivalents
|
13 |
+
voice_mapping = {
|
14 |
+
'alloy': 'en-US-AvaNeural',
|
15 |
+
'echo': 'en-US-AndrewNeural',
|
16 |
+
'fable': 'en-GB-SoniaNeural',
|
17 |
+
'onyx': 'en-US-EricNeural',
|
18 |
+
'nova': 'en-US-SteffanNeural',
|
19 |
+
'shimmer': 'en-US-EmmaNeural'
|
20 |
+
}
|
21 |
+
|
22 |
+
async def _generate_audio(text, voice, response_format, speed):
|
23 |
+
# Determine if the voice is an OpenAI-compatible voice or a direct edge-tts voice
|
24 |
+
edge_tts_voice = voice_mapping.get(voice, voice) # Use mapping if in OpenAI names, otherwise use as-is
|
25 |
+
|
26 |
+
# Generate the TTS output in mp3 format first
|
27 |
+
temp_output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
|
28 |
+
communicator = edge_tts.Communicate(text, edge_tts_voice)
|
29 |
+
await communicator.save(temp_output_file.name)
|
30 |
+
|
31 |
+
# If the requested format is mp3 and speed is 1.0, return the generated file directly
|
32 |
+
if response_format == "mp3" and speed == 1.0:
|
33 |
+
return temp_output_file.name
|
34 |
+
|
35 |
+
# Convert to the requested format if not mp3 or if speed adjustment is needed
|
36 |
+
converted_output_file = tempfile.NamedTemporaryFile(delete=False, suffix=f".{response_format}")
|
37 |
+
|
38 |
+
# ffmpeg playback speed adjustment
|
39 |
+
speed_filter = f"atempo={speed}" if response_format != "pcm" else f"asetrate=44100*{speed},aresample=44100"
|
40 |
+
ffmpeg_command = [
|
41 |
+
"ffmpeg", "-i", temp_output_file.name,
|
42 |
+
"-filter:a", speed_filter, # Apply speed adjustment
|
43 |
+
"-f", response_format, "-y",
|
44 |
+
converted_output_file.name
|
45 |
+
]
|
46 |
+
|
47 |
+
try:
|
48 |
+
subprocess.run(ffmpeg_command, check=True)
|
49 |
+
except subprocess.CalledProcessError as e:
|
50 |
+
raise RuntimeError(f"Error in audio conversion: {e}")
|
51 |
+
|
52 |
+
return converted_output_file.name
|
53 |
+
|
54 |
+
def generate_speech(text, voice, response_format, speed=1.0):
|
55 |
+
return asyncio.run(_generate_audio(text, voice, response_format, speed))
|
56 |
+
|
57 |
+
def get_models():
|
58 |
+
return [
|
59 |
+
{"id": "tts-1", "name": "Text-to-speech v1"},
|
60 |
+
{"id": "tts-1-hd", "name": "Text-to-speech v1 HD"}
|
61 |
+
]
|
62 |
+
|
63 |
+
async def _get_voices(language=None):
|
64 |
+
# List all voices, filter by language if specified
|
65 |
+
all_voices = await edge_tts.list_voices()
|
66 |
+
language = language or DEFAULT_LANGUAGE # Use default if no language specified
|
67 |
+
filtered_voices = [
|
68 |
+
{"name": v['ShortName'], "gender": v['Gender'], "language": v['Locale']}
|
69 |
+
for v in all_voices if language == 'all' or language is None or v['Locale'] == language
|
70 |
+
]
|
71 |
+
return filtered_voices
|
72 |
+
|
73 |
+
def get_voices(language=None):
|
74 |
+
return asyncio.run(_get_voices(language))
|
src/openai-edge-tts/app/utils.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# utils.py
|
2 |
+
|
3 |
+
from flask import request, jsonify
|
4 |
+
from functools import wraps
|
5 |
+
import os
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
|
8 |
+
load_dotenv()
|
9 |
+
|
10 |
+
def getenv_bool(name: str, default: bool = False) -> bool:
|
11 |
+
return os.getenv(name, str(default)).lower() in ("yes", "y", "true", "1", "t")
|
12 |
+
|
13 |
+
API_KEY = os.getenv('API_KEY', 'your_api_key_here')
|
14 |
+
REQUIRE_API_KEY = getenv_bool('REQUIRE_API_KEY', True)
|
15 |
+
|
16 |
+
def require_api_key(f):
|
17 |
+
@wraps(f)
|
18 |
+
def decorated_function(*args, **kwargs):
|
19 |
+
if not REQUIRE_API_KEY:
|
20 |
+
return f(*args, **kwargs)
|
21 |
+
auth_header = request.headers.get('Authorization')
|
22 |
+
if not auth_header or not auth_header.startswith('Bearer '):
|
23 |
+
return jsonify({"error": "Missing or invalid API key"}), 401
|
24 |
+
token = auth_header.split('Bearer ')[1]
|
25 |
+
if token != API_KEY:
|
26 |
+
return jsonify({"error": "Invalid API key"}), 401
|
27 |
+
return f(*args, **kwargs)
|
28 |
+
return decorated_function
|
29 |
+
|
30 |
+
# Mapping of audio format to MIME type
|
31 |
+
AUDIO_FORMAT_MIME_TYPES = {
|
32 |
+
"mp3": "audio/mpeg",
|
33 |
+
"opus": "audio/ogg",
|
34 |
+
"aac": "audio/aac",
|
35 |
+
"flac": "audio/flac",
|
36 |
+
"wav": "audio/wav",
|
37 |
+
"pcm": "audio/L16"
|
38 |
+
}
|
src/openai-edge-tts/requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
flask
|
2 |
+
gevent
|
3 |
+
python-dotenv
|
4 |
+
edge-tts
|
5 |
+
art
|
src/startup.sh
CHANGED
@@ -1,11 +1,22 @@
|
|
1 |
#!/bin/sh
|
2 |
|
3 |
-
#
|
4 |
-
|
5 |
-
|
6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
# 只输出日志
|
9 |
-
pm2 logs
|
10 |
|
11 |
cd ${HOMEDIR}
|
|
|
1 |
#!/bin/sh
|
2 |
|
3 |
+
# 检查REMIX_INSTALL是否为true
|
4 |
+
if [ "$REMIX_INSTALL" = true ]; then
|
5 |
+
# 使用 PM2 启动 Remix 应用,并传递 PORT 环境变量
|
6 |
+
cd ${HOMEDIR}/${REMIX_NAME}
|
7 |
+
pm2 start ecosystem.config.cjs
|
8 |
+
pm2 save
|
9 |
+
fi
|
10 |
+
|
11 |
+
# 检查SSHX_INSTALL是否为true
|
12 |
+
|
13 |
+
# 检查 OPENAI_EDGE_TTS_INSTALL是否为true
|
14 |
+
if [ "$OPENAI_EDGE_TTS_INSTALL" = true ]; then
|
15 |
+
pm2 start python ${HOMEDIR}/openai-edge-tts/app/server.py --name openai-edge-tts
|
16 |
+
pm2 save
|
17 |
+
fi
|
18 |
|
19 |
# 只输出日志
|
20 |
+
pm2 logs --lines 50
|
21 |
|
22 |
cd ${HOMEDIR}
|