orztv commited on
Commit
76374ce
·
1 Parent(s): 37635f4
Dockerfile CHANGED
@@ -4,6 +4,12 @@ ENV USER=pn \
4
  HOMEDIR=/home/pn \
5
  PORT=7860 \
6
  REMIX_NAME=remix-app
 
 
 
 
 
 
7
 
8
  RUN apt-get update && apt-get install -y --no-install-recommends \
9
  apt-utils \
@@ -17,7 +23,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
17
 
18
  WORKDIR ${HOMEDIR}
19
 
20
- # 复制所有脚本文件到工作目录
21
  COPY src/* ${HOMEDIR}/
22
 
23
  # 给所有 .sh 文件添加执行权限
@@ -25,8 +31,9 @@ RUN chmod +x ${HOMEDIR}/*.sh
25
 
26
  # 运行 setup.sh、sshx.sh 和 remix.sh
27
  RUN ${HOMEDIR}/setup.sh \
28
- && ${HOMEDIR}/remix.sh \
29
- && ${HOMEDIR}/sshx.sh
 
30
 
31
  # 暴露 Remix 端口
32
  EXPOSE ${PORT}
 
4
  HOMEDIR=/home/pn \
5
  PORT=7860 \
6
  REMIX_NAME=remix-app
7
+ SSHX_INSTALL=true
8
+
9
+ # 以下2选1,不然端口冲突
10
+ ENV OPENAI_EDGE_TTS_INSTALL=true \
11
+ REMIX_INSTALL=false
12
+
13
 
14
  RUN apt-get update && apt-get install -y --no-install-recommends \
15
  apt-utils \
 
23
 
24
  WORKDIR ${HOMEDIR}
25
 
26
+ # 复制src文件夹下的所有文件夹及文件到工作目录
27
  COPY src/* ${HOMEDIR}/
28
 
29
  # 给所有 .sh 文件添加执行权限
 
31
 
32
  # 运行 setup.sh、sshx.sh 和 remix.sh
33
  RUN ${HOMEDIR}/setup.sh \
34
+ && if [ "$REMIX_INSTALL" = true ]; then ${HOMEDIR}/remix.sh; fi \
35
+ && if [ "$SSHX_INSTALL" = true ]; then ${HOMEDIR}/sshx.sh; fi \
36
+ && if [ "$OPENAI_EDGE_TTS_INSTALL" = true ]; then ${HOMEDIR}/openai-edge-tts.sh; fi
37
 
38
  # 暴露 Remix 端口
39
  EXPOSE ${PORT}
src/openai-edge-tts.sh ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+ set -e
3
+
4
+ echo "开始安装 openai-edge-tts..."
5
+
6
+ cp -r /tmp/openai-edge-tts $HOMEDIR/openai-edge-tts
7
+
8
+ #检查openai-edge-tts文件夹是否存在
9
+ if [ ! -d "$HOMEDIR/openai-edge-tts" ]; then
10
+ echo "openai-edge-tts文件夹不存在"
11
+ exit 1
12
+ fi
13
+
14
+ # 进入openai-edge-tts文件夹并安装依赖
15
+ cd $HOMEDIR/openai-edge-tts
16
+ pip install -r requirements.txt
17
+
18
+ # .env 文件
19
+ cp .env.example .env
20
+
21
+ echo "openai-edge-tts安装完成"
src/openai-edge-tts/.env.example ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ API_KEY=your_api_key_here
2
+ PORT=7860
3
+
4
+ DEFAULT_VOICE=en-US-AndrewNeural
5
+ DEFAULT_RESPONSE_FORMAT=mp3
6
+ DEFAULT_SPEED=1.0
7
+
8
+ DEFAULT_LANGUAGE=en-US
9
+
10
+ REQUIRE_API_KEY=True
src/openai-edge-tts/Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Dockerfile
2
+
3
+ FROM python:3.12-slim
4
+
5
+ WORKDIR /app
6
+
7
+ COPY requirements.txt /app
8
+ RUN pip install -r requirements.txt
9
+
10
+ COPY app/ /app
11
+ CMD ["python", "server.py"]
src/openai-edge-tts/app/server.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # server.py
2
+
3
+ from flask import Flask, request, send_file, jsonify
4
+ from gevent.pywsgi import WSGIServer
5
+ from dotenv import load_dotenv
6
+ import os
7
+
8
+ from tts_handler import generate_speech, get_models, get_voices
9
+ from utils import require_api_key, AUDIO_FORMAT_MIME_TYPES
10
+
11
+ app = Flask(__name__)
12
+ load_dotenv()
13
+
14
+ API_KEY = os.getenv('API_KEY', 'your_api_key_here')
15
+ PORT = int(os.getenv('PORT', 5050))
16
+
17
+ DEFAULT_VOICE = os.getenv('DEFAULT_VOICE', 'en-US-AndrewNeural')
18
+ DEFAULT_RESPONSE_FORMAT = os.getenv('DEFAULT_RESPONSE_FORMAT', 'mp3')
19
+ DEFAULT_SPEED = float(os.getenv('DEFAULT_SPEED', 1.0))
20
+
21
+ # DEFAULT_MODEL = os.getenv('DEFAULT_MODEL', 'tts-1')
22
+
23
+ @app.route('/v1/audio/speech', methods=['POST'])
24
+ @require_api_key
25
+ def text_to_speech():
26
+ data = request.json
27
+ if not data or 'input' not in data:
28
+ return jsonify({"error": "Missing 'input' in request body"}), 400
29
+
30
+ text = data.get('input')
31
+ # model = data.get('model', DEFAULT_MODEL)
32
+ voice = data.get('voice', DEFAULT_VOICE)
33
+
34
+ response_format = data.get('response_format', DEFAULT_RESPONSE_FORMAT)
35
+ speed = float(data.get('speed', DEFAULT_SPEED))
36
+
37
+ mime_type = AUDIO_FORMAT_MIME_TYPES.get(response_format, "audio/mpeg")
38
+
39
+ # Generate the audio file in the specified format with speed adjustment
40
+ output_file_path = generate_speech(text, voice, response_format, speed)
41
+
42
+ # Return the file with the correct MIME type
43
+ return send_file(output_file_path, mimetype=mime_type, as_attachment=True, download_name=f"speech.{response_format}")
44
+
45
+ @app.route('/v1/models', methods=['GET', 'POST'])
46
+ @require_api_key
47
+ def list_models():
48
+ return jsonify({"data": get_models()})
49
+
50
+ @app.route('/v1/voices', methods=['GET', 'POST'])
51
+ @require_api_key
52
+ def list_voices():
53
+ specific_language = None
54
+
55
+ data = request.args if request.method == 'GET' else request.json
56
+ if data and ('language' in data or 'locale' in data):
57
+ specific_language = data.get('language') if 'language' in data else data.get('locale')
58
+
59
+ return jsonify({"voices": get_voices(specific_language)})
60
+
61
+ @app.route('/v1/voices/all', methods=['GET', 'POST'])
62
+ @require_api_key
63
+ def list_all_voices():
64
+ return jsonify({"voices": get_voices('all')})
65
+
66
+ print(f" Edge TTS (Free Azure TTS) Replacement for OpenAI's TTS API")
67
+ print(f" ")
68
+ print(f" * Serving OpenAI Edge TTS")
69
+ print(f" * Server running on http://localhost:{PORT}")
70
+ print(f" * TTS Endpoint: http://localhost:{PORT}/v1/audio/speech")
71
+ print(f" ")
72
+
73
+ if __name__ == '__main__':
74
+ http_server = WSGIServer(('0.0.0.0', PORT), app)
75
+ http_server.serve_forever()
src/openai-edge-tts/app/tts_handler.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # tts_handler.py
2
+
3
+ import edge_tts
4
+ import asyncio
5
+ import tempfile
6
+ import subprocess
7
+ import os
8
+
9
+ # Language default (environment variable)
10
+ DEFAULT_LANGUAGE = os.getenv('DEFAULT_LANGUAGE', 'en-US')
11
+
12
+ # OpenAI voice names mapped to edge-tts equivalents
13
+ voice_mapping = {
14
+ 'alloy': 'en-US-AvaNeural',
15
+ 'echo': 'en-US-AndrewNeural',
16
+ 'fable': 'en-GB-SoniaNeural',
17
+ 'onyx': 'en-US-EricNeural',
18
+ 'nova': 'en-US-SteffanNeural',
19
+ 'shimmer': 'en-US-EmmaNeural'
20
+ }
21
+
22
+ async def _generate_audio(text, voice, response_format, speed):
23
+ # Determine if the voice is an OpenAI-compatible voice or a direct edge-tts voice
24
+ edge_tts_voice = voice_mapping.get(voice, voice) # Use mapping if in OpenAI names, otherwise use as-is
25
+
26
+ # Generate the TTS output in mp3 format first
27
+ temp_output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
28
+ communicator = edge_tts.Communicate(text, edge_tts_voice)
29
+ await communicator.save(temp_output_file.name)
30
+
31
+ # If the requested format is mp3 and speed is 1.0, return the generated file directly
32
+ if response_format == "mp3" and speed == 1.0:
33
+ return temp_output_file.name
34
+
35
+ # Convert to the requested format if not mp3 or if speed adjustment is needed
36
+ converted_output_file = tempfile.NamedTemporaryFile(delete=False, suffix=f".{response_format}")
37
+
38
+ # ffmpeg playback speed adjustment
39
+ speed_filter = f"atempo={speed}" if response_format != "pcm" else f"asetrate=44100*{speed},aresample=44100"
40
+ ffmpeg_command = [
41
+ "ffmpeg", "-i", temp_output_file.name,
42
+ "-filter:a", speed_filter, # Apply speed adjustment
43
+ "-f", response_format, "-y",
44
+ converted_output_file.name
45
+ ]
46
+
47
+ try:
48
+ subprocess.run(ffmpeg_command, check=True)
49
+ except subprocess.CalledProcessError as e:
50
+ raise RuntimeError(f"Error in audio conversion: {e}")
51
+
52
+ return converted_output_file.name
53
+
54
+ def generate_speech(text, voice, response_format, speed=1.0):
55
+ return asyncio.run(_generate_audio(text, voice, response_format, speed))
56
+
57
+ def get_models():
58
+ return [
59
+ {"id": "tts-1", "name": "Text-to-speech v1"},
60
+ {"id": "tts-1-hd", "name": "Text-to-speech v1 HD"}
61
+ ]
62
+
63
+ async def _get_voices(language=None):
64
+ # List all voices, filter by language if specified
65
+ all_voices = await edge_tts.list_voices()
66
+ language = language or DEFAULT_LANGUAGE # Use default if no language specified
67
+ filtered_voices = [
68
+ {"name": v['ShortName'], "gender": v['Gender'], "language": v['Locale']}
69
+ for v in all_voices if language == 'all' or language is None or v['Locale'] == language
70
+ ]
71
+ return filtered_voices
72
+
73
+ def get_voices(language=None):
74
+ return asyncio.run(_get_voices(language))
src/openai-edge-tts/app/utils.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # utils.py
2
+
3
+ from flask import request, jsonify
4
+ from functools import wraps
5
+ import os
6
+ from dotenv import load_dotenv
7
+
8
+ load_dotenv()
9
+
10
+ def getenv_bool(name: str, default: bool = False) -> bool:
11
+ return os.getenv(name, str(default)).lower() in ("yes", "y", "true", "1", "t")
12
+
13
+ API_KEY = os.getenv('API_KEY', 'your_api_key_here')
14
+ REQUIRE_API_KEY = getenv_bool('REQUIRE_API_KEY', True)
15
+
16
+ def require_api_key(f):
17
+ @wraps(f)
18
+ def decorated_function(*args, **kwargs):
19
+ if not REQUIRE_API_KEY:
20
+ return f(*args, **kwargs)
21
+ auth_header = request.headers.get('Authorization')
22
+ if not auth_header or not auth_header.startswith('Bearer '):
23
+ return jsonify({"error": "Missing or invalid API key"}), 401
24
+ token = auth_header.split('Bearer ')[1]
25
+ if token != API_KEY:
26
+ return jsonify({"error": "Invalid API key"}), 401
27
+ return f(*args, **kwargs)
28
+ return decorated_function
29
+
30
+ # Mapping of audio format to MIME type
31
+ AUDIO_FORMAT_MIME_TYPES = {
32
+ "mp3": "audio/mpeg",
33
+ "opus": "audio/ogg",
34
+ "aac": "audio/aac",
35
+ "flac": "audio/flac",
36
+ "wav": "audio/wav",
37
+ "pcm": "audio/L16"
38
+ }
src/openai-edge-tts/requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ flask
2
+ gevent
3
+ python-dotenv
4
+ edge-tts
5
+ art
src/startup.sh CHANGED
@@ -1,11 +1,22 @@
1
  #!/bin/sh
2
 
3
- # 使用 PM2 启动 Remix 应用,并传递 PORT 环境变量
4
- cd ${HOMEDIR}/${REMIX_NAME}
5
- pm2 start ecosystem.config.cjs
6
- pm2 save
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  # 只输出日志
9
- pm2 logs my-remix-app --lines 50
10
 
11
  cd ${HOMEDIR}
 
1
  #!/bin/sh
2
 
3
+ # 检查REMIX_INSTALL是否为true
4
+ if [ "$REMIX_INSTALL" = true ]; then
5
+ # 使用 PM2 启动 Remix 应用,并传递 PORT 环境变量
6
+ cd ${HOMEDIR}/${REMIX_NAME}
7
+ pm2 start ecosystem.config.cjs
8
+ pm2 save
9
+ fi
10
+
11
+ # 检查SSHX_INSTALL是否为true
12
+
13
+ # 检查 OPENAI_EDGE_TTS_INSTALL是否为true
14
+ if [ "$OPENAI_EDGE_TTS_INSTALL" = true ]; then
15
+ pm2 start python ${HOMEDIR}/openai-edge-tts/app/server.py --name openai-edge-tts
16
+ pm2 save
17
+ fi
18
 
19
  # 只输出日志
20
+ pm2 logs --lines 50
21
 
22
  cd ${HOMEDIR}