Remsky commited on
Commit
1b3eebf
·
1 Parent(s): d259ceb

Update TTSModel to load modules and model files from v0.19 directory

Browse files
Files changed (2) hide show
  1. deprecated copy.py +0 -435
  2. tts_model.py +3 -3
deprecated copy.py DELETED
@@ -1,435 +0,0 @@
1
- # import os
2
- # import gradio as gr
3
- # import time
4
- # import math
5
- # import logging
6
- # import matplotlib.pyplot as plt
7
- # import numpy as np
8
- # # from lib.mock_tts import MockTTSModel
9
- # from lib import format_audio_output
10
- # from lib.ui_content import header_html, demo_text_info
11
- # from lib.book_utils import get_available_books, get_book_info, get_chapter_text
12
- # from lib.text_utils import count_tokens
13
- # from tts_model import TTSModel
14
-
15
- # # Set HF_HOME for faster restarts with cached models/voices
16
- # os.environ["HF_HOME"] = "/data/.huggingface"
17
-
18
- # # Create TTS model instance
19
- # model = TTSModel()
20
-
21
- # # Configure logging
22
- # logging.basicConfig(level=logging.DEBUG)
23
- # # Suppress matplotlib debug messages
24
- # logging.getLogger('matplotlib').setLevel(logging.WARNING)
25
- # logger = logging.getLogger(__name__)
26
- # logger.debug("Starting app initialization...")
27
-
28
-
29
- # model = TTSModel()
30
-
31
- # def initialize_model():
32
- # """Initialize model and get voices"""
33
- # if model.model is None:
34
- # if not model.initialize():
35
- # raise gr.Error("Failed to initialize model")
36
-
37
- # voices = model.list_voices()
38
- # if not voices:
39
- # raise gr.Error("No voices found. Please check the voices directory.")
40
-
41
- # default_voice = 'af_sky' if 'af_sky' in voices else voices[0] if voices else None
42
-
43
- # return gr.update(choices=voices, value=default_voice)
44
-
45
- # def update_progress(chunk_num, total_chunks, tokens_per_sec, rtf, progress_state, start_time, gpu_timeout, progress):
46
- # # Calculate time metrics
47
- # elapsed = time.time() - start_time
48
- # gpu_time_left = max(0, gpu_timeout - elapsed)
49
-
50
- # # Calculate chunk time more accurately
51
- # prev_total_time = sum(progress_state["chunk_times"]) if progress_state["chunk_times"] else 0
52
- # chunk_time = elapsed - prev_total_time
53
-
54
- # # Validate metrics before adding to state
55
- # if chunk_time > 0 and tokens_per_sec >= 0:
56
- # # Update progress state with validated metrics
57
- # progress_state["progress"] = chunk_num / total_chunks
58
- # progress_state["total_chunks"] = total_chunks
59
- # progress_state["gpu_time_left"] = gpu_time_left
60
- # progress_state["tokens_per_sec"].append(float(tokens_per_sec))
61
- # progress_state["rtf"].append(float(rtf))
62
- # progress_state["chunk_times"].append(chunk_time)
63
-
64
- # # Only update progress display during processing
65
- # progress(progress_state["progress"], desc=f"Processing chunk {chunk_num}/{total_chunks} | GPU Time Left: {int(gpu_time_left)}s")
66
-
67
- # def generate_speech_from_ui(text, voice_names, speed, gpu_timeout, progress=gr.Progress(track_tqdm=False)):
68
- # """Handle text-to-speech generation from the Gradio UI"""
69
- # try:
70
- # if not text or not voice_names:
71
- # raise gr.Error("Please enter text and select at least one voice")
72
-
73
- # start_time = time.time()
74
-
75
- # # Create progress state with explicit type initialization
76
- # progress_state = {
77
- # "progress": 0.0,
78
- # "tokens_per_sec": [], # Initialize as empty list
79
- # "rtf": [], # Initialize as empty list
80
- # "chunk_times": [], # Initialize as empty list
81
- # "gpu_time_left": float(gpu_timeout), # Ensure float
82
- # "total_chunks": 0
83
- # }
84
-
85
- # # Handle single or multiple voices
86
- # if isinstance(voice_names, str):
87
- # voice_names = [voice_names]
88
-
89
- # # Generate speech with progress tracking using combined voice
90
- # audio_array, duration, metrics = model.generate_speech(
91
- # text,
92
- # voice_names,
93
- # speed,
94
- # gpu_timeout=gpu_timeout,
95
- # progress_callback=update_progress,
96
- # progress_state=progress_state,
97
- # progress=progress
98
- # )
99
-
100
- # # Format output for Gradio
101
- # audio_output, duration_text = format_audio_output(audio_array)
102
-
103
- # # Create plot and metrics text outside GPU context
104
- # fig, metrics_text = create_performance_plot(metrics, voice_names)
105
-
106
- # return (
107
- # audio_output,
108
- # fig,
109
- # metrics_text
110
- # )
111
- # except Exception as e:
112
- # raise gr.Error(f"Generation failed: {str(e)}")
113
-
114
- # def create_performance_plot(metrics, voice_names):
115
- # """Create performance plot and metrics text from generation metrics"""
116
- # # Clean and process the data
117
- # tokens_per_sec = np.array(metrics["tokens_per_sec"])
118
- # rtf_values = np.array(metrics["rtf"])
119
-
120
- # # Calculate statistics using cleaned data
121
- # median_tps = float(np.median(tokens_per_sec))
122
- # mean_tps = float(np.mean(tokens_per_sec))
123
- # std_tps = float(np.std(tokens_per_sec))
124
-
125
- # # Set y-axis limits based on data range
126
- # y_min = max(0, np.min(tokens_per_sec) * 0.9)
127
- # y_max = np.max(tokens_per_sec) * 1.1
128
-
129
- # # Create plot
130
- # fig, ax = plt.subplots(figsize=(10, 5))
131
- # fig.patch.set_facecolor('black')
132
- # ax.set_facecolor('black')
133
-
134
- # # Plot data points
135
- # chunk_nums = list(range(1, len(tokens_per_sec) + 1))
136
-
137
- # # Plot data points
138
- # ax.bar(chunk_nums, tokens_per_sec, color='#ff2a6d', alpha=0.6)
139
-
140
- # # Set y-axis limits with padding
141
- # padding = 0.1 * (y_max - y_min)
142
- # ax.set_ylim(max(0, y_min - padding), y_max + padding)
143
-
144
- # # Add median line
145
- # ax.axhline(y=median_tps, color='#05d9e8', linestyle='--',
146
- # label=f'Median: {median_tps:.1f} tokens/sec')
147
-
148
- # # Style improvements
149
- # ax.set_xlabel('Chunk Number', fontsize=24, labelpad=20, color='white')
150
- # ax.set_ylabel('Tokens per Second', fontsize=24, labelpad=20, color='white')
151
- # ax.set_title('Processing Speed by Chunk', fontsize=28, pad=30, color='white')
152
- # ax.tick_params(axis='both', which='major', labelsize=20, colors='white')
153
- # ax.spines['bottom'].set_color('white')
154
- # ax.spines['top'].set_color('white')
155
- # ax.spines['left'].set_color('white')
156
- # ax.spines['right'].set_color('white')
157
- # ax.grid(False)
158
- # ax.legend(fontsize=20, facecolor='black', edgecolor='#05d9e8', loc='lower left',
159
- # labelcolor='white')
160
-
161
- # plt.tight_layout()
162
-
163
- # # Calculate average RTF from individual chunk RTFs
164
- # rtf = np.mean(rtf_values)
165
-
166
- # # Prepare metrics text
167
- # metrics_text = (
168
- # f"Median Speed: {median_tps:.1f} tokens/sec (o200k_base)\n" +
169
- # f"Real-time Factor: {rtf:.3f}\n" +
170
- # f"Real Time Speed: {int(1/rtf)}x\n" +
171
- # f"Processing Time: {int(metrics['total_time'])}s\n" +
172
- # f"Total Tokens: {metrics['total_tokens']} (o200k_base)\n" +
173
- # f"Voices: {', '.join(voice_names)}"
174
- # )
175
-
176
- # return fig, metrics_text
177
-
178
-
179
- # # Create Gradio interface
180
- # with gr.Blocks(title="Kokoro TTS Demo", css="""
181
- # .equal-height {
182
- # min-height: 400px;
183
- # display: flex;
184
- # flex-direction: column;
185
- # }
186
- # .token-label {
187
- # font-size: 1rem;
188
- # margin-bottom: 0.3rem;
189
- # text-align: center;
190
- # padding: 0.2rem 0;
191
- # }
192
- # .token-count {
193
- # color: #4169e1;
194
- # }
195
- # """) as demo:
196
- # gr.HTML(header_html)
197
-
198
- # with gr.Row():
199
- # # Column 1: Text Input and Book Selection
200
- # with gr.Column(elem_classes="equal-height"):
201
- # # Book selection
202
- # books = get_available_books()
203
- # book_dropdown = gr.Dropdown(
204
- # label="Select Book",
205
- # choices=[book['label'] for book in books],
206
- # value=books[0]['label'] if books else None,
207
- # type="value",
208
- # allow_custom_value=True
209
- # )
210
-
211
- # # Initialize chapters for first book
212
- # initial_book = books[0]['value'] if books else None
213
- # initial_chapters = []
214
- # if initial_book:
215
- # book_path = os.path.join("texts/processed", initial_book)
216
- # _, chapters = get_book_info(book_path)
217
- # initial_chapters = [ch['title'] for ch in chapters]
218
-
219
- # # Chapter selection with initial chapters
220
- # chapter_dropdown = gr.Dropdown(
221
- # label="Select Chapter",
222
- # choices=initial_chapters,
223
- # value=initial_chapters[0] if initial_chapters else None,
224
- # type="value",
225
- # allow_custom_value=True
226
- # )
227
- # lab_tps = 175
228
- # lab_rts = 50
229
- # # Text input area with initial chapter text
230
- # initial_text = ""
231
- # if initial_chapters and initial_book:
232
- # book_path = os.path.join("texts/processed", initial_book)
233
- # _, chapters = get_book_info(book_path)
234
- # if chapters:
235
- # initial_text = get_chapter_text(book_path, chapters[0]['id'])
236
- # tokens = count_tokens(initial_text)
237
- # time_estimate = math.ceil(tokens / lab_tps)
238
- # output_estimate = (time_estimate * lab_rts)//60
239
- # initial_label = f'<div class="token-label"><span class="token-count">Estimated {output_estimate} minutes in ~{time_estimate}s</span></div>'
240
- # else:
241
- # initial_label = '<div class="token-label"></div>'
242
- # else:
243
- # initial_label = '<div class="token-label"></div>'
244
-
245
- # def update_text_label(text):
246
- # if not text:
247
- # return '<div class="token-label"></div>'
248
- # tokens = count_tokens(text)
249
- # time_estimate = math.ceil(tokens / lab_tps)
250
- # output_estimate = (time_estimate * lab_rts)//60
251
- # return f'<div class="token-label"><span class="token-count">Estimated {output_estimate} minutes in ~{time_estimate}s</span></div>'
252
-
253
-
254
- # text_input = gr.TextArea(
255
- # label=None,
256
- # placeholder="Enter text here, select a chapter, or upload a .txt file",
257
- # value=initial_text,
258
- # lines=8,
259
- # max_lines=14,
260
- # show_label=False,
261
- # show_copy_button=True # Add copy button for convenience
262
- # )
263
-
264
- # clear_btn = gr.Button("Clear Text", variant="secondary")
265
- # label_html = gr.HTML(initial_label)
266
-
267
- # def clear_text():
268
- # return "", '<div class="token-label"></div>'
269
-
270
- # clear_btn.click(
271
- # fn=clear_text,
272
- # outputs=[text_input, label_html]
273
- # )
274
-
275
- # # Update label whenever text changes
276
- # text_input.change(
277
- # fn=update_text_label,
278
- # inputs=[text_input],
279
- # outputs=[label_html],
280
- # trigger_mode="always_last"
281
- # )
282
-
283
- # def update_chapters(book_name):
284
- # if not book_name:
285
- # return gr.update(choices=[], value=None), "", '<div class="token-label"></div>'
286
- # # Find the corresponding book file
287
- # book_file = next((book['value'] for book in books if book['label'] == book_name), None)
288
- # if not book_file:
289
- # return gr.update(choices=[], value=None), "", '<div class="token-label"></div>'
290
- # book_path = os.path.join("texts/processed", book_file)
291
- # book_title, chapters = get_book_info(book_path)
292
- # # Create simple choices list of chapter titles
293
- # chapter_choices = [ch['title'] for ch in chapters]
294
- # # Set initial chapter text when book is selected
295
- # initial_text = get_chapter_text(book_path, chapters[0]['id']) if chapters else ""
296
- # if initial_text:
297
- # tokens = count_tokens(initial_text)
298
- # time_estimate = math.ceil(tokens / 150 / 10) * 10
299
- # label = f'<div class="token-label"><span class="token-count">({tokens} tokens, ~{time_estimate}s generation time)</span></div>'
300
- # else:
301
- # label = '<div class="token-label"></div>'
302
- # return gr.update(choices=chapter_choices, value=chapter_choices[0] if chapter_choices else None), initial_text, label
303
-
304
- # def load_chapter_text(book_name, chapter_title):
305
- # if not book_name or not chapter_title:
306
- # return "", '<div class="token-label"></div>'
307
- # # Find the corresponding book file
308
- # book_file = next((book['value'] for book in books if book['label'] == book_name), None)
309
- # if not book_file:
310
- # return "", '<div class="token-label"></div>'
311
- # book_path = os.path.join("texts/processed", book_file)
312
- # # Get all chapters and find the one matching the title
313
- # _, chapters = get_book_info(book_path)
314
- # for ch in chapters:
315
- # if ch['title'] == chapter_title:
316
- # text = get_chapter_text(book_path, ch['id'])
317
- # tokens = count_tokens(text)
318
- # time_estimate = math.ceil(tokens / 150 / 10) * 10
319
- # return text, f'<div class="token-label"> <span class="token-count">({tokens} tokens, ~{time_estimate}s generation time)</span></div>'
320
- # return "", '<div class="token-label"></div>'
321
-
322
- # # Set up event handlers for book/chapter selection
323
- # book_dropdown.change(
324
- # fn=update_chapters,
325
- # inputs=[book_dropdown],
326
- # outputs=[chapter_dropdown, text_input, label_html]
327
- # )
328
-
329
- # chapter_dropdown.change(
330
- # fn=load_chapter_text,
331
- # inputs=[book_dropdown, chapter_dropdown],
332
- # outputs=[text_input, label_html]
333
- # )
334
-
335
- # # Column 2: Controls
336
- # with gr.Column(elem_classes="equal-height"):
337
- # file_input = gr.File(
338
- # label="Upload .txt file",
339
- # file_types=[".txt"],
340
- # type="binary"
341
- # )
342
-
343
- # def load_text_from_file(file_bytes):
344
- # if file_bytes is None:
345
- # return None, '<div class="token-label"></div>'
346
- # try:
347
- # text = file_bytes.decode('utf-8')
348
- # tokens = count_tokens(text)
349
- # time_estimate = math.ceil(tokens / 150 / 10) * 10 # Round up to nearest 10 seconds
350
- # return text, f'<div class="token-label"><span class="token-count">({tokens} tokens, ~{time_estimate}s generation time)</span></div>'
351
- # except Exception as e:
352
- # raise gr.Error(f"Failed to read file: {str(e)}")
353
-
354
- # file_input.change(
355
- # fn=load_text_from_file,
356
- # inputs=[file_input],
357
- # outputs=[text_input, label_html]
358
- # )
359
-
360
- # with gr.Group():
361
- # voice_dropdown = gr.Dropdown(
362
- # label="Voice(s)",
363
- # choices=[], # Start empty, will be populated after initialization
364
- # value=None,
365
- # allow_custom_value=True,
366
- # multiselect=True
367
- # )
368
-
369
- # # Add refresh button to manually update voice list
370
- # refresh_btn = gr.Button("🔄 Refresh Voices", size="sm")
371
-
372
- # speed_slider = gr.Slider(
373
- # label="Speed",
374
- # minimum=0.5,
375
- # maximum=2.0,
376
- # value=1.0,
377
- # step=0.1
378
- # )
379
- # gpu_timeout_slider = gr.Slider(
380
- # label="GPU Timeout (seconds)",
381
- # minimum=15,
382
- # maximum=120,
383
- # value=90,
384
- # step=1,
385
- # info="Maximum time allowed for GPU processing"
386
- # )
387
- # submit_btn = gr.Button("Generate Speech", variant="primary")
388
-
389
- # # Column 3: Output
390
- # with gr.Column(elem_classes="equal-height"):
391
- # audio_output = gr.Audio(
392
- # label="Generated Speech",
393
- # type="numpy",
394
- # format="wav",
395
- # autoplay=False
396
- # )
397
- # progress_bar = gr.Progress(track_tqdm=False)
398
- # metrics_text = gr.Textbox(
399
- # label="Performance Summary",
400
- # interactive=False,
401
- # lines=5
402
- # )
403
- # metrics_plot = gr.Plot(
404
- # label="Processing Metrics",
405
- # show_label=True,
406
- # format="png" # Explicitly set format to PNG which is supported by matplotlib
407
- # )
408
-
409
- # # Set up event handlers
410
- # refresh_btn.click(
411
- # fn=initialize_model,
412
- # outputs=[voice_dropdown]
413
- # )
414
-
415
- # submit_btn.click(
416
- # fn=generate_speech_from_ui,
417
- # inputs=[text_input, voice_dropdown, speed_slider, gpu_timeout_slider],
418
- # outputs=[audio_output, metrics_plot, metrics_text],
419
- # show_progress=True
420
- # )
421
-
422
- # # Add text analysis info
423
- # with gr.Row():
424
- # with gr.Column():
425
- # gr.Markdown(demo_text_info)
426
-
427
- # # Initialize voices on load
428
- # demo.load(
429
- # fn=initialize_model,
430
- # outputs=[voice_dropdown]
431
- # )
432
-
433
- # # Launch the app
434
- # if __name__ == "__main__":
435
- # demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tts_model.py CHANGED
@@ -28,9 +28,9 @@ class TTSModel:
28
  ensure_dir(self.voices_dir)
29
  self.model_path = None
30
 
31
- # Load required modules
32
  py_modules = ["istftnet", "plbert", "models", "kokoro"]
33
- module_files = download_model_files(self.model_repo, [f"{m}.py" for m in py_modules])
34
 
35
  for module_name, file_path in zip(py_modules, module_files):
36
  load_module_from_file(module_name, file_path)
@@ -48,7 +48,7 @@ class TTSModel:
48
  # Download model files
49
  model_files = download_model_files(
50
  self.model_repo,
51
- ["kokoro-v0_19.pth", "config.json"]
52
  )
53
  self.model_path = model_files[0] # kokoro-v0_19.pth
54
 
 
28
  ensure_dir(self.voices_dir)
29
  self.model_path = None
30
 
31
+ # Load required modules from v0.19 directory
32
  py_modules = ["istftnet", "plbert", "models", "kokoro"]
33
+ module_files = download_model_files(self.model_repo, [f"v0.19/{m}.py" for m in py_modules])
34
 
35
  for module_name, file_path in zip(py_modules, module_files):
36
  load_module_from_file(module_name, file_path)
 
48
  # Download model files
49
  model_files = download_model_files(
50
  self.model_repo,
51
+ ["v0.19/kokoro-v0_19.pth", "v0.19/config.json"]
52
  )
53
  self.model_path = model_files[0] # kokoro-v0_19.pth
54