getapi commited on
Commit
7507bd6
·
verified ·
1 Parent(s): 2c73f4b

Create app.ts

Browse files
Files changed (1) hide show
  1. app.ts +373 -0
app.ts ADDED
@@ -0,0 +1,373 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import {serve} from "https://deno.land/std/http/server.ts";
2
+ import {EdgeSpeechTTS} from "https://esm.sh/@lobehub/tts@1";
3
+
4
+ async function synthesizeSpeech(model: string, voice: string, text: string) {
5
+ let voiceName;
6
+ let rate = 0;
7
+ let pitch = 0;
8
+
9
+ if (!model.includes("Neural")) {
10
+ switch (model) {
11
+ case "ava":
12
+ voiceName = "en-US-AvaMultilingualNeural";
13
+ break;
14
+ case "andrew":
15
+ voiceName = "en-US-AndrewMultilingualNeural";
16
+ break;
17
+ case "emma":
18
+ voiceName = "en-US-EmmaMultilingualNeural";
19
+ break;
20
+ case "brian":
21
+ voiceName = "en-US-BrianMultilingualNeural";
22
+ break;
23
+ case "vivienne":
24
+ voiceName = "fr-FR-VivienneMultilingualNeural";
25
+ break;
26
+ case "remy":
27
+ voiceName = "fr-FR-RemyMultilingualNeural";
28
+ break;
29
+ case "seraphina":
30
+ voiceName = "de-DE-SeraphinaMultilingualNeural";
31
+ break;
32
+ case "florian":
33
+ voiceName = "de-DE-FlorianMultilingualNeural";
34
+ break;
35
+ case "dmitry":
36
+ voiceName = "ru-RU-DmitryNeural";
37
+ break;
38
+ case "svetlana":
39
+ voiceName = "ru-RU-SvetlanaNeural";
40
+ break;
41
+ default:
42
+ voiceName = "en-US-BrianMultilingualNeural";
43
+ break;
44
+ }
45
+ } else {
46
+ voiceName = model;
47
+ const params = Object.fromEntries(voice.split("|").map((p) => p.split(":") as [string, string]));
48
+ rate = Number(params["rate"] || 0);
49
+ pitch = Number(params["pitch"] || 0);
50
+ }
51
+
52
+ const tts = new EdgeSpeechTTS();
53
+
54
+ const payload = {
55
+ input: text, options: {
56
+ rate: rate, pitch: pitch, voice: voiceName
57
+ },
58
+ };
59
+ const response = await tts.create(payload);
60
+ const mp3Buffer = new Uint8Array(await response.arrayBuffer());
61
+ return new Response(mp3Buffer, {
62
+ headers: {"Content-Type": "audio/mpeg"},
63
+ });
64
+ }
65
+
66
+ function validateContentType(req: Request, expected: string) {
67
+ const contentType = req.headers.get("Content-Type");
68
+ if (contentType !== expected) {
69
+ console.log(`Invalid Content-Type ${contentType}, expected ${expected}`);
70
+ return new Response("Bad Request", {status: 400});
71
+ }
72
+ }
73
+
74
+ async function handleDebugRequest() {
75
+ const voice = "rate:0.0|pitch:0.0";
76
+ const model = "en-US-BrianMultilingualNeural";
77
+ const text = "Приветик! Надеюсь ты меня хорошо слышишь? Алё?!";
78
+ console.log(`model=${model}, voice=${voice}, text=${text}`);
79
+ return synthesizeSpeech(model, voice, text);
80
+ }
81
+
82
+ async function handleSynthesisRequest(req: Request) {
83
+ if (req.method !== "POST") {
84
+ return new Response("Method Not Allowed", {status: 405});
85
+ }
86
+ const invalidContentType = validateContentType(req, "application/json");
87
+ if (invalidContentType) return invalidContentType;
88
+ const {model, input, voice} = await req.json();
89
+ return synthesizeSpeech(model, voice, input);
90
+ }
91
+
92
+
93
+ async function handleDemoRequest(req: Request) {
94
+ const html = `<!DOCTYPE html>
95
+ <html lang="en">
96
+ <head>
97
+ <meta charset="UTF-8" />
98
+ <meta content="width=device-width, initial-scale=1.0" name="viewport" />
99
+ <title>tts</title>
100
+ <style>
101
+ body {
102
+ background-color: #121212;
103
+ color: #e0e0e0;
104
+ font-family: Arial, sans-serif;
105
+ margin: 0;
106
+ padding: 20px;
107
+ }
108
+
109
+ .container {
110
+ max-width: 800px;
111
+ margin: 0 auto;
112
+ padding: 20px;
113
+ background-color: #1e1e1e;
114
+ border-radius: 8px;
115
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
116
+ }
117
+
118
+ .input-area, .output-area {
119
+ margin-bottom: 20px;
120
+ }
121
+
122
+ .slider-container, .textarea-container, .dropdown-container {
123
+ margin-bottom: 20px;
124
+ }
125
+
126
+ label {
127
+ display: block;
128
+ margin-bottom: 8px;
129
+ font-weight: bold;
130
+ }
131
+
132
+ input[type="range"] {
133
+ width: 100%;
134
+ }
135
+
136
+ .slider-value {
137
+ text-align: center;
138
+ margin-top: 8px;
139
+ }
140
+
141
+ textarea {
142
+ max-width: 780px;
143
+ width: calc(100% - 20px);
144
+ height: 100px;
145
+ padding: 10px;
146
+ border: 1px solid #333;
147
+ border-radius: 4px;
148
+ background-color: #2e2e2e;
149
+ color: #e0e0e0;
150
+ resize: none;
151
+ }
152
+
153
+ select {
154
+ width: 100%;
155
+ padding: 10px;
156
+ border: 1px solid #333;
157
+ border-radius: 4px;
158
+ background-color: #2e2e2e;
159
+ color: #e0e0e0;
160
+ }
161
+
162
+ button {
163
+ width: 100%;
164
+ padding: 10px;
165
+ border: none;
166
+ border-radius: 4px;
167
+ background-color: #6200ea;
168
+ color: #fff;
169
+ font-size: 16px;
170
+ cursor: pointer;
171
+ transition: background-color 0.3s;
172
+ }
173
+
174
+ button:hover {
175
+ background-color: #3700b3;
176
+ }
177
+
178
+ h1 {
179
+ font-size: 24px;
180
+ margin-bottom: 20px;
181
+ }
182
+
183
+ a {
184
+ color: #bb86fc;
185
+ text-decoration: none;
186
+ }
187
+
188
+ a:hover {
189
+ text-decoration: underline;
190
+ }
191
+
192
+ #audioPlayerContainer {
193
+ text-align: center;
194
+ }
195
+
196
+ audio {
197
+ width: 100%;
198
+ max-width: 600px;
199
+ margin: 10px 0;
200
+ }
201
+
202
+ a {
203
+ display: block;
204
+ margin: 10px 0;
205
+ }
206
+ pre {
207
+ color: #94c890;
208
+ background: #000000;
209
+ padding: 5px 10px;
210
+ margin: 0;
211
+ font-size: 1.12em;
212
+ }
213
+ </style>
214
+
215
+ </head>
216
+ <body>
217
+ <div class="container">
218
+ <div class="input-area">
219
+ <div class="textarea-container">
220
+ <label for="inputText">текст:</label
221
+ ><textarea id="inputText">Привет, хочешь я расскажу сказку?</textarea>
222
+ </div>
223
+ <div class="dropdown-container">
224
+ <label for="voiceSelect">выберите голос:</label>
225
+ <select id="voiceSelect"></select>
226
+ </div>
227
+ <button id="synthesizeButton">синтезировать</button>
228
+ </div>
229
+ <div class="output-area">
230
+ <div id="audioPlayerContainer"></div>
231
+ </div>
232
+ <details>
233
+ <summary>api</summary>
234
+ <p>получить список голосов:</p>
235
+ <pre id="apiVoices"></pre>
236
+ <p>post-запрос для синтеза голоса из текста:</p>
237
+ <pre id="apiExamples"></pre>
238
+ </details>
239
+ </div>
240
+ <script>
241
+ let audio = null;
242
+
243
+ document.getElementById('synthesizeButton').addEventListener('click', () => {
244
+ const text = document.getElementById('inputText').value || 'приветик! давай поболтаем немного?';
245
+ const rate = '0.0';
246
+ const pitch = '0.0';
247
+ const voice = \`rate:\${rate}|pitch:\${pitch}\`;
248
+ const model = document.getElementById('voiceSelect').value;
249
+
250
+ if (audio) {
251
+ audio.pause();
252
+ audio.currentTime = 0;
253
+ }
254
+
255
+ fetch('/v1/audio/speech', {
256
+ method: 'POST',
257
+ headers: { 'Content-Type': 'application/json' },
258
+ body: JSON.stringify({ model, input: text, voice })
259
+ })
260
+ .then(response => response.blob())
261
+ .then(blob => {
262
+ const audioUrl = URL.createObjectURL(blob);
263
+ const audioPlayerContainer = document.getElementById('audioPlayerContainer');
264
+
265
+ if (audio) {
266
+ audio.pause();
267
+ audioPlayerContainer.innerHTML = '';
268
+ }
269
+
270
+ audio = new Audio(audioUrl);
271
+ audio.controls = true;
272
+ audioPlayerContainer.appendChild(audio);
273
+
274
+ const downloadLink = document.createElement('a');
275
+ downloadLink.href = audioUrl;
276
+ downloadLink.download = 'synthesized_voice.mp3';
277
+ downloadLink.textContent = 'Скачать аудио';
278
+ downloadLink.style.display = 'block';
279
+ downloadLink.style.marginTop = '10px';
280
+
281
+ audioPlayerContainer.appendChild(downloadLink);
282
+ audio.play();
283
+ });
284
+
285
+ });
286
+
287
+ async function fetchModels() {
288
+ try {
289
+ const response = await fetch('/v1/audio/models');
290
+ const models = await response.json();
291
+ const voiceSelect = document.getElementById('voiceSelect');
292
+
293
+ models.forEach((model, index) => {
294
+ const option = document.createElement('option');
295
+ option.value = model.model;
296
+ option.textContent = model.model;
297
+ if (index === 1) {option.selected = true;}
298
+ voiceSelect.appendChild(option);
299
+ });
300
+ } catch (error) {
301
+ console.error('ошибка при получении списка моделей:', error);
302
+ }
303
+ }
304
+ fetchModels();
305
+
306
+ function createApiExamples() {
307
+ const apiExamples = document.getElementById('apiExamples');
308
+ const apiVoices = document.getElementById('apiVoices');
309
+ const currentUrl = window.location.origin;
310
+ const voices_pre = \`curl \${currentUrl}/v1/audio/models\`;
311
+ const examples_pre = \`curl 'https://gnilets-tts.hf.space/v1/audio/speech' \\\\\\\\
312
+ -H 'content-type: application/json' \\\\\\\\
313
+ --data-raw '{"model":"brian","input":"привет! хрю-хрю!","voice":"rate:0|pitch:0"}' \\\\\\\\
314
+ -o tts_voice.mp3
315
+ \`;
316
+ apiVoices.textContent = voices_pre.replace(/\\\\\\\\/g, '\\\\');
317
+ apiExamples.textContent = examples_pre.replace(/\\\\\\\\/g, '\\\\');
318
+
319
+ }
320
+ createApiExamples();
321
+ </script>
322
+ </body></html>`;
323
+
324
+ return new Response(html, {
325
+ headers: {"Content-Type": "text/html"},
326
+ });
327
+ }
328
+
329
+ async function handleVoiceList() {
330
+ let voices = [{model: 'ava', gender: 'female'}, {model: 'andrew', gender: 'male'}, {model: 'emma', gender: 'female'}, {model: 'brian', gender: 'male'}, {model: 'vivienne', gender: 'female'}, {model: 'remy', gender: 'male'}, {
331
+ model: 'seraphina', gender: 'female'
332
+ }, {model: 'florian', gender: 'male'}, {model: 'dmitry', gender: 'male'}, {model: 'svetlana', gender: 'female'}];
333
+
334
+ const sortedVoiceList = voices.sort((a, b) => {
335
+ if (a.gender === 'male' && b.gender === 'female') return -1;
336
+ if (a.gender === 'female' && b.gender === 'male') return 1;
337
+ return 0;
338
+ });
339
+
340
+ return new Response(JSON.stringify(sortedVoiceList), {
341
+ headers: {"Content-Type": "application/json"},
342
+ });
343
+
344
+ }
345
+
346
+
347
+ serve(async (req) => {
348
+ try {
349
+ const url = new URL(req.url);
350
+
351
+ if (url.pathname === "/") {
352
+ return handleDemoRequest(req);
353
+ }
354
+ if (url.pathname === "/v1/audio/models") {
355
+ return handleVoiceList();
356
+ }
357
+ if (url.pathname === "/tts") {
358
+ return handleDebugRequest();
359
+ }
360
+
361
+ if (url.pathname !== "/v1/audio/speech") {
362
+ console.log(`Unhandled path ${url.pathname}`);
363
+ return new Response("Not Found", {status: 404});
364
+ }
365
+
366
+ return handleSynthesisRequest(req);
367
+ } catch (err) {
368
+ console.error(`Error processing request: ${err.message}`);
369
+ return new Response(`Internal Server Error\n${err.message}`, {
370
+ status: 500,
371
+ });
372
+ }
373
+ });