jdana commited on
Commit
57b5ba6
·
1 Parent(s): 461e871

Added program files from GitHub

Browse files
Files changed (3) hide show
  1. Dockerfile +90 -0
  2. app.py +411 -0
  3. requirements.txt +31 -0
Dockerfile ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use a slim version of Python runtime as a parent image
2
+ FROM python:3.10.13
3
+
4
+ # Prevent Python from writing .pyc files and buffering stdout/stderr
5
+ ENV PYTHONDONTWRITEBYTECODE=1
6
+ ENV PYTHONUNBUFFERED=1
7
+
8
+ # Install necessary system dependencies
9
+ RUN apt-get update && apt-get install -y --no-install-recommends \
10
+ ffmpeg \
11
+ libsndfile1 \
12
+ git \
13
+ wget \
14
+ libegl1 \
15
+ libgl1 \
16
+ libgl1-mesa-glx \
17
+ libopengl0 \
18
+ libxcb-cursor0 \
19
+ libxcb-shape0 \
20
+ libxcb-randr0 \
21
+ libxcb-render0 \
22
+ libxcb-render-util0 \
23
+ libxcb-image0 \
24
+ libxcb-keysyms1 \
25
+ libxcb-glx0 \
26
+ libxkbcommon0 \
27
+ libxkbcommon-x11-0 \
28
+ libx11-xcb1 \
29
+ libxrender1 \
30
+ libxfixes3 \
31
+ libxdamage1 \
32
+ libxext6 \
33
+ libsm6 \
34
+ libx11-6 \
35
+ libxft2 \
36
+ libxinerama1 \
37
+ libxrandr2 \
38
+ libxcomposite1 \
39
+ libxcursor1 \
40
+ libxi6 \
41
+ libfontconfig1 \
42
+ libfreetype6 \
43
+ libssl3 \
44
+ libxml2 \
45
+ libxslt1.1 \
46
+ libsqlite3-0 \
47
+ zlib1g \
48
+ libopenjp2-7 \
49
+ libjpeg62-turbo \
50
+ libpng16-16 \
51
+ libtiff-dev \
52
+ libwebp7 \
53
+ poppler-utils \
54
+ libxml2-dev \
55
+ libxslt1-dev \
56
+ libgtk-3-0 \
57
+ libglib2.0-0 \
58
+ libglib2.0-data \
59
+ libice6 \
60
+ && apt-get clean \
61
+ && rm -rf /var/lib/apt/lists/*
62
+
63
+ # Install Calibre (which includes ebook-convert)
64
+ RUN wget -nv -O- https://download.calibre-ebook.com/linux-installer.sh | sh /dev/stdin
65
+
66
+ # Ensure that ebook-convert is available in PATH
67
+ ENV PATH="/root/calibre:${PATH}"
68
+
69
+ # Set working directory
70
+ WORKDIR /app
71
+
72
+ # Create necessary directories
73
+ RUN mkdir -p /app/Working_files/Book /app/Working_files/temp_ebook /app/Working_files/temp
74
+
75
+ # Copy the requirements file
76
+ COPY requirements.txt .
77
+
78
+ # Install Python dependencies
79
+ RUN pip install --upgrade pip
80
+ RUN pip install --no-cache-dir --verbose -r requirements.txt
81
+
82
+ # Download NLTK data
83
+ RUN python -m nltk.downloader punkt
84
+
85
+ # Copy your application files
86
+ COPY app.py .
87
+
88
+ # **Set ENTRYPOINT and CMD**
89
+ ENTRYPOINT ["python", "app.py"]
90
+ CMD []
app.py ADDED
@@ -0,0 +1,411 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import tempfile
4
+ import subprocess
5
+ import csv
6
+ from collections import OrderedDict
7
+ from importlib.resources import files
8
+
9
+ import click
10
+ import gradio as gr
11
+ import numpy as np
12
+ import soundfile as sf
13
+ import torchaudio
14
+ from cached_path import cached_path
15
+ from transformers import AutoModelForCausalLM, AutoTokenizer
16
+
17
+ from ebooklib import epub, ITEM_DOCUMENT
18
+ from bs4 import BeautifulSoup
19
+ import nltk
20
+ from nltk.tokenize import sent_tokenize
21
+ from pydub import AudioSegment
22
+ import magic
23
+ from mutagen.id3 import ID3, APIC, error
24
+
25
+ from f5_tts.model import DiT
26
+ from f5_tts.infer.utils_infer import (
27
+ load_vocoder,
28
+ load_model,
29
+ preprocess_ref_audio_text,
30
+ infer_process,
31
+ )
32
+
33
+ try:
34
+ import spaces
35
+ USING_SPACES = True
36
+ except ImportError:
37
+ USING_SPACES = False
38
+
39
+ DEFAULT_TTS_MODEL = "F5-TTS"
40
+
41
+ # GPU Decorator
42
+ def gpu_decorator(func):
43
+ if USING_SPACES:
44
+ return spaces.GPU(func)
45
+ return func
46
+
47
+ # Load models
48
+ vocoder = load_vocoder()
49
+
50
+ def load_f5tts(ckpt_path=None):
51
+ if ckpt_path is None:
52
+ ckpt_path = str(cached_path("hf://SWivid/F5-TTS/F5TTS_Base/model_1200000.safetensors"))
53
+ model_cfg = {
54
+ "dim": 1024,
55
+ "depth": 22,
56
+ "heads": 16,
57
+ "ff_mult": 2,
58
+ "text_dim": 512,
59
+ "conv_layers": 4
60
+ }
61
+ return load_model(DiT, model_cfg, ckpt_path)
62
+
63
+ F5TTS_ema_model = load_f5tts()
64
+
65
+ chat_model_state = None
66
+ chat_tokenizer_state = None
67
+
68
+ @gpu_decorator
69
+ def generate_response(messages, model, tokenizer):
70
+ """Generate a response using the provided model and tokenizer."""
71
+ text = tokenizer.apply_chat_template(
72
+ messages,
73
+ tokenize=False,
74
+ add_generation_prompt=True,
75
+ )
76
+
77
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
78
+ generated_ids = model.generate(
79
+ input_features=model_inputs.input_features,
80
+ max_new_tokens=512,
81
+ temperature=0.7,
82
+ top_p=0.95,
83
+ )
84
+
85
+ if not generated_ids:
86
+ raise ValueError("No generated IDs returned by the model.")
87
+
88
+ generated_ids = [
89
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
90
+ ]
91
+
92
+ if not generated_ids or not generated_ids[0]:
93
+ raise ValueError("Generated IDs are empty after processing.")
94
+
95
+ return tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
96
+
97
+ def extract_metadata_and_cover(ebook_path):
98
+ """Extract cover image from the eBook."""
99
+ try:
100
+ cover_path = os.path.splitext(ebook_path)[0] + '.jpg'
101
+ subprocess.run(['ebook-meta', ebook_path, '--get-cover', cover_path], check=True)
102
+ if os.path.exists(cover_path):
103
+ return cover_path
104
+ except Exception as e:
105
+ print(f"Error extracting eBook cover: {e}")
106
+ return None
107
+
108
+ def embed_cover_into_mp3(mp3_path, cover_image_path):
109
+ """Embed a cover image into the MP3 file's metadata."""
110
+ try:
111
+ audio = ID3(mp3_path)
112
+ except error:
113
+ audio = ID3()
114
+
115
+ # Remove existing APIC frames to avoid duplicates
116
+ audio.delall("APIC")
117
+
118
+ try:
119
+ with open(cover_image_path, 'rb') as img:
120
+ audio.add(APIC(
121
+ encoding=3, # 3 is for UTF-8
122
+ mime='image/jpeg', # Image MIME type
123
+ type=3, # 3 is for front cover
124
+ desc='Front cover', # Description
125
+ data=img.read()
126
+ ))
127
+ # Save with ID3v2.3 for better compatibility
128
+ audio.save(mp3_path, v2_version=3)
129
+ print(f"Embedded cover image into {mp3_path}")
130
+ except Exception as e:
131
+ print(f"Failed to embed cover image into MP3: {e}")
132
+
133
+ def extract_text_and_title_from_epub(epub_path):
134
+ """Extract text and title from an EPUB file."""
135
+ try:
136
+ book = epub.read_epub(epub_path)
137
+ print(f"EPUB '{epub_path}' successfully read.")
138
+ except Exception as e:
139
+ raise RuntimeError(f"Failed to read EPUB file: {e}")
140
+
141
+ text_content = []
142
+ title = None
143
+
144
+ try:
145
+ metadata = book.get_metadata('DC', 'title')
146
+ if metadata:
147
+ title = metadata[0][0]
148
+ print(f"Extracted title: {title}")
149
+ else:
150
+ title = os.path.splitext(os.path.basename(epub_path))[0]
151
+ print(f"No title in metadata. Using filename: {title}")
152
+ except Exception:
153
+ title = os.path.splitext(os.path.basename(epub_path))[0]
154
+ print(f"Using filename as title: {title}")
155
+
156
+ for item in book.get_items():
157
+ if item.get_type() == ITEM_DOCUMENT:
158
+ try:
159
+ soup = BeautifulSoup(item.get_content(), 'html.parser')
160
+ text = soup.get_text(separator=' ', strip=True)
161
+ if text:
162
+ text_content.append(text)
163
+ else:
164
+ print(f"No text in document item {item.get_id()}.")
165
+ except Exception as e:
166
+ print(f"Error parsing document item {item.get_id()}: {e}")
167
+
168
+ full_text = ' '.join(text_content)
169
+
170
+ if not full_text:
171
+ raise ValueError("No text found in EPUB file.")
172
+
173
+ print(f"Extracted {len(full_text)} characters from EPUB.")
174
+ return full_text, title
175
+
176
+ def convert_to_epub(input_path, output_path):
177
+ """Convert an ebook to EPUB format using Calibre."""
178
+ try:
179
+ ensure_directory(os.path.dirname(output_path))
180
+ subprocess.run(['ebook-convert', input_path, output_path], check=True)
181
+ print(f"Converted {input_path} to EPUB.")
182
+ return True
183
+ except subprocess.CalledProcessError as e:
184
+ raise RuntimeError(f"Error converting eBook: {e}")
185
+ except Exception as e:
186
+ raise RuntimeError(f"Unexpected error during conversion: {e}")
187
+
188
+ def detect_file_type(file_path):
189
+ """Detect the MIME type of a file."""
190
+ try:
191
+ mime = magic.Magic(mime=True)
192
+ return mime.from_file(file_path)
193
+ except Exception as e:
194
+ raise RuntimeError(f"Error detecting file type: {e}")
195
+
196
+ def ensure_directory(directory_path):
197
+ """Ensure that a directory exists."""
198
+ try:
199
+ os.makedirs(directory_path, exist_ok=True)
200
+ except Exception as e:
201
+ raise RuntimeError(f"Error creating directory {directory_path}: {e}")
202
+
203
+ def sanitize_filename(filename):
204
+ """Sanitize a filename by removing invalid characters."""
205
+ sanitized = re.sub(r'[\\/*?:"<>|]', "", filename)
206
+ return sanitized.replace(" ", "_")
207
+
208
+ def show_converted_audiobooks():
209
+ """List all converted audiobook files."""
210
+ output_dir = os.path.join("Working_files", "Book")
211
+ if not os.path.exists(output_dir):
212
+ return ["No audiobooks found."]
213
+
214
+ files = [f for f in os.listdir(output_dir) if f.endswith(('.mp3', '.m4b'))]
215
+ if not files:
216
+ return ["No audiobooks found."]
217
+
218
+ return [os.path.join(output_dir, f) for f in files]
219
+
220
+ @gpu_decorator
221
+ def infer(ref_audio_orig, ref_text, gen_text, cross_fade_duration=0.15, speed=1, show_info=gr.Info, progress=gr.Progress()):
222
+ """Perform inference to generate audio from text."""
223
+ try:
224
+ ref_audio, ref_text = preprocess_ref_audio_text(ref_audio_orig, ref_text, show_info=show_info)
225
+ except Exception as e:
226
+ raise RuntimeError(f"Error in preprocessing reference audio and text: {e}")
227
+
228
+ if not gen_text.strip():
229
+ raise ValueError("Generated text is empty. Please provide valid text content.")
230
+
231
+ try:
232
+ final_wave, final_sample_rate, _ = infer_process(
233
+ ref_audio,
234
+ ref_text,
235
+ gen_text,
236
+ F5TTS_ema_model,
237
+ vocoder,
238
+ cross_fade_duration=cross_fade_duration,
239
+ speed=speed,
240
+ show_info=show_info,
241
+ progress=progress, # Pass progress here
242
+ )
243
+ except Exception as e:
244
+ raise RuntimeError(f"Error during inference process: {e}")
245
+
246
+ return (final_sample_rate, final_wave), ref_text
247
+
248
+ @gpu_decorator
249
+ def basic_tts(ref_audio_input, ref_text_input, gen_file_input, cross_fade_duration, speed, progress=gr.Progress()):
250
+ """Main function to convert eBooks to audiobooks."""
251
+ try:
252
+ last_file = None
253
+
254
+ num_ebooks = len(gen_file_input)
255
+ for idx, ebook in enumerate(gen_file_input):
256
+ progress(0, desc=f"Processing ebook {idx+1}/{num_ebooks}")
257
+ epub_path = ebook
258
+ if not os.path.exists(epub_path):
259
+ raise FileNotFoundError(f"File not found: {epub_path}")
260
+
261
+ file_type = detect_file_type(epub_path)
262
+ if file_type != 'application/epub+zip':
263
+ sanitized_base = sanitize_filename(os.path.splitext(os.path.basename(epub_path))[0])
264
+ temp_epub = os.path.join("Working_files", "temp_converted", f"{sanitized_base}.epub")
265
+ convert_to_epub(epub_path, temp_epub)
266
+ epub_path = temp_epub
267
+
268
+ progress(0.1, desc="Extracting text and title from EPUB")
269
+ gen_text, ebook_title = extract_text_and_title_from_epub(epub_path)
270
+ cover_image = extract_metadata_and_cover(epub_path)
271
+
272
+ ref_text = ref_text_input or ""
273
+
274
+ progress(0.2, desc="Starting inference")
275
+ audio_out, _ = infer(
276
+ ref_audio_input,
277
+ ref_text,
278
+ gen_text,
279
+ cross_fade_duration,
280
+ speed,
281
+ progress=progress, # Pass progress here
282
+ )
283
+
284
+ progress(0.8, desc="Stitching audio files")
285
+ sample_rate, wave = audio_out
286
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_wav:
287
+ sf.write(tmp_wav.name, wave, sample_rate)
288
+ tmp_wav_path = tmp_wav.name
289
+
290
+ progress(0.9, desc="Converting to MP3")
291
+ sanitized_title = sanitize_filename(ebook_title) or f"audiobook_{int(tempfile._get_default_tempdir())}"
292
+ tmp_mp3_path = os.path.join("Working_files", "Book", f"{sanitized_title}.mp3")
293
+ ensure_directory(os.path.dirname(tmp_mp3_path))
294
+
295
+ audio = AudioSegment.from_wav(tmp_wav_path)
296
+ audio.export(tmp_mp3_path, format="mp3", bitrate="256k")
297
+
298
+ if cover_image:
299
+ embed_cover_into_mp3(tmp_mp3_path, cover_image)
300
+
301
+ os.remove(tmp_wav_path)
302
+ if cover_image and os.path.exists(cover_image):
303
+ os.remove(cover_image)
304
+
305
+ last_file = tmp_mp3_path
306
+ progress(1, desc="Completed processing ebook")
307
+
308
+ audiobooks = show_converted_audiobooks()
309
+
310
+ return last_file, audiobooks
311
+
312
+ except Exception as e:
313
+ print(f"An error occurred: {e}")
314
+ raise e
315
+
316
+ def create_gradio_app():
317
+ """Create and configure the Gradio application."""
318
+ with gr.Blocks(theme=gr.themes.Ocean()) as app:
319
+ gr.Markdown("# eBook to Audiobook with F5-TTS!")
320
+
321
+ ref_audio_input = gr.Audio(
322
+ label="Upload Voice File (<15 sec) or Record with Mic Icon (Ensure Natural Phrasing, Trim Silence)",
323
+ type="filepath"
324
+ )
325
+
326
+ gen_file_input = gr.Files(
327
+ label="Upload eBook or Multiple for Batch Processing (epub, mobi, pdf, txt, html)",
328
+ file_types=[".epub", ".mobi", ".pdf", ".txt", ".html"],
329
+ type="filepath",
330
+ file_count="multiple",
331
+ )
332
+
333
+ generate_btn = gr.Button("Start", variant="primary")
334
+
335
+ show_audiobooks_btn = gr.Button("Show All Completed Audiobooks", variant="secondary")
336
+ audiobooks_output = gr.Files(label="Converted Audiobooks (Download Links ->)")
337
+
338
+ player = gr.Audio(label="Play Latest Converted Audiobook", interactive=False)
339
+
340
+ with gr.Accordion("Advanced Settings", open=False):
341
+ ref_text_input = gr.Textbox(
342
+ label="Reference Text (Leave Blank for Automatic Transcription)",
343
+ lines=2,
344
+ )
345
+ speed_slider = gr.Slider(
346
+ label="Speech Speed (Adjusting Can Cause Artifacts)",
347
+ minimum=0.3,
348
+ maximum=2.0,
349
+ value=1.0,
350
+ step=0.1,
351
+ )
352
+ cross_fade_duration_slider = gr.Slider(
353
+ label="Cross-Fade Duration (Between Generated Audio Chunks)",
354
+ minimum=0.0,
355
+ maximum=1.0,
356
+ value=0.15,
357
+ step=0.01,
358
+ )
359
+
360
+ generate_btn.click(
361
+ basic_tts,
362
+ inputs=[
363
+ ref_audio_input,
364
+ ref_text_input,
365
+ gen_file_input,
366
+ cross_fade_duration_slider,
367
+ speed_slider,
368
+ ],
369
+ outputs=[player, audiobooks_output],
370
+ show_progress=True, # Enable progress bar
371
+ )
372
+
373
+ show_audiobooks_btn.click(
374
+ show_converted_audiobooks,
375
+ inputs=[],
376
+ outputs=[audiobooks_output],
377
+ )
378
+
379
+ return app
380
+
381
+ @click.command()
382
+ @click.option("--port", "-p", default=None, type=int, help="Port to run the app on")
383
+ @click.option("--host", "-H", default=None, help="Host to run the app on")
384
+ @click.option(
385
+ "--share",
386
+ "-s",
387
+ default=False,
388
+ is_flag=True,
389
+ help="Share the app via Gradio share link",
390
+ )
391
+ @click.option("--api", "-a", default=True, is_flag=True, help="Allow API access")
392
+ def main(port, host, share, api):
393
+ """Main entry point to launch the Gradio app."""
394
+ app = create_gradio_app()
395
+ print("Starting app...")
396
+ app.queue().launch(
397
+ server_name="0.0.0.0",
398
+ server_port=port or 7860,
399
+ share=True,
400
+ show_api=api,
401
+ debug=True
402
+ )
403
+
404
+ if __name__ == "__main__":
405
+ import sys
406
+ print("Arguments passed to Python:", sys.argv)
407
+ if not USING_SPACES:
408
+ main()
409
+ else:
410
+ app = create_gradio_app()
411
+ app.queue().launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch>=1.13.0
2
+ torchaudio>=0.13.0
3
+ accelerate>=0.33.0
4
+ bitsandbytes>0.37.0
5
+ cached_path>=1.0.0
6
+ click>=8.0
7
+ datasets>=2.0.0
8
+ ema_pytorch>=0.5.2
9
+ gradio>=5.0.0
10
+ jieba>=0.42.1
11
+ librosa>=0.9.2
12
+ matplotlib>=3.5.1
13
+ numpy<=1.26.4
14
+ pydub>=0.25.1
15
+ pypinyin>=0.46.0
16
+ safetensors>=0.3.1
17
+ soundfile>=0.10.3
18
+ tomli>=2.0.1
19
+ torchdiffeq>=0.2.3
20
+ tqdm>=4.65.0
21
+ transformers>=4.0.0
22
+ vocos==0.1.0
23
+ wandb>=0.15.0
24
+ x_transformers>=1.31.14
25
+ f5_tts @ git+https://github.com/SWivid/F5-TTS.git
26
+ ebooklib==0.17.1
27
+ beautifulsoup4>=4.11.0
28
+ pdfminer.six>=20221105
29
+ python-magic>=0.4.27
30
+ nltk>=3.6.0
31
+ mutagen>=1.45.1