Added program files from GitHub
Browse files- Dockerfile +90 -0
- app.py +411 -0
- requirements.txt +31 -0
Dockerfile
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use a slim version of Python runtime as a parent image
|
2 |
+
FROM python:3.10.13
|
3 |
+
|
4 |
+
# Prevent Python from writing .pyc files and buffering stdout/stderr
|
5 |
+
ENV PYTHONDONTWRITEBYTECODE=1
|
6 |
+
ENV PYTHONUNBUFFERED=1
|
7 |
+
|
8 |
+
# Install necessary system dependencies
|
9 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
10 |
+
ffmpeg \
|
11 |
+
libsndfile1 \
|
12 |
+
git \
|
13 |
+
wget \
|
14 |
+
libegl1 \
|
15 |
+
libgl1 \
|
16 |
+
libgl1-mesa-glx \
|
17 |
+
libopengl0 \
|
18 |
+
libxcb-cursor0 \
|
19 |
+
libxcb-shape0 \
|
20 |
+
libxcb-randr0 \
|
21 |
+
libxcb-render0 \
|
22 |
+
libxcb-render-util0 \
|
23 |
+
libxcb-image0 \
|
24 |
+
libxcb-keysyms1 \
|
25 |
+
libxcb-glx0 \
|
26 |
+
libxkbcommon0 \
|
27 |
+
libxkbcommon-x11-0 \
|
28 |
+
libx11-xcb1 \
|
29 |
+
libxrender1 \
|
30 |
+
libxfixes3 \
|
31 |
+
libxdamage1 \
|
32 |
+
libxext6 \
|
33 |
+
libsm6 \
|
34 |
+
libx11-6 \
|
35 |
+
libxft2 \
|
36 |
+
libxinerama1 \
|
37 |
+
libxrandr2 \
|
38 |
+
libxcomposite1 \
|
39 |
+
libxcursor1 \
|
40 |
+
libxi6 \
|
41 |
+
libfontconfig1 \
|
42 |
+
libfreetype6 \
|
43 |
+
libssl3 \
|
44 |
+
libxml2 \
|
45 |
+
libxslt1.1 \
|
46 |
+
libsqlite3-0 \
|
47 |
+
zlib1g \
|
48 |
+
libopenjp2-7 \
|
49 |
+
libjpeg62-turbo \
|
50 |
+
libpng16-16 \
|
51 |
+
libtiff-dev \
|
52 |
+
libwebp7 \
|
53 |
+
poppler-utils \
|
54 |
+
libxml2-dev \
|
55 |
+
libxslt1-dev \
|
56 |
+
libgtk-3-0 \
|
57 |
+
libglib2.0-0 \
|
58 |
+
libglib2.0-data \
|
59 |
+
libice6 \
|
60 |
+
&& apt-get clean \
|
61 |
+
&& rm -rf /var/lib/apt/lists/*
|
62 |
+
|
63 |
+
# Install Calibre (which includes ebook-convert)
|
64 |
+
RUN wget -nv -O- https://download.calibre-ebook.com/linux-installer.sh | sh /dev/stdin
|
65 |
+
|
66 |
+
# Ensure that ebook-convert is available in PATH
|
67 |
+
ENV PATH="/root/calibre:${PATH}"
|
68 |
+
|
69 |
+
# Set working directory
|
70 |
+
WORKDIR /app
|
71 |
+
|
72 |
+
# Create necessary directories
|
73 |
+
RUN mkdir -p /app/Working_files/Book /app/Working_files/temp_ebook /app/Working_files/temp
|
74 |
+
|
75 |
+
# Copy the requirements file
|
76 |
+
COPY requirements.txt .
|
77 |
+
|
78 |
+
# Install Python dependencies
|
79 |
+
RUN pip install --upgrade pip
|
80 |
+
RUN pip install --no-cache-dir --verbose -r requirements.txt
|
81 |
+
|
82 |
+
# Download NLTK data
|
83 |
+
RUN python -m nltk.downloader punkt
|
84 |
+
|
85 |
+
# Copy your application files
|
86 |
+
COPY app.py .
|
87 |
+
|
88 |
+
# **Set ENTRYPOINT and CMD**
|
89 |
+
ENTRYPOINT ["python", "app.py"]
|
90 |
+
CMD []
|
app.py
ADDED
@@ -0,0 +1,411 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
import tempfile
|
4 |
+
import subprocess
|
5 |
+
import csv
|
6 |
+
from collections import OrderedDict
|
7 |
+
from importlib.resources import files
|
8 |
+
|
9 |
+
import click
|
10 |
+
import gradio as gr
|
11 |
+
import numpy as np
|
12 |
+
import soundfile as sf
|
13 |
+
import torchaudio
|
14 |
+
from cached_path import cached_path
|
15 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
16 |
+
|
17 |
+
from ebooklib import epub, ITEM_DOCUMENT
|
18 |
+
from bs4 import BeautifulSoup
|
19 |
+
import nltk
|
20 |
+
from nltk.tokenize import sent_tokenize
|
21 |
+
from pydub import AudioSegment
|
22 |
+
import magic
|
23 |
+
from mutagen.id3 import ID3, APIC, error
|
24 |
+
|
25 |
+
from f5_tts.model import DiT
|
26 |
+
from f5_tts.infer.utils_infer import (
|
27 |
+
load_vocoder,
|
28 |
+
load_model,
|
29 |
+
preprocess_ref_audio_text,
|
30 |
+
infer_process,
|
31 |
+
)
|
32 |
+
|
33 |
+
try:
|
34 |
+
import spaces
|
35 |
+
USING_SPACES = True
|
36 |
+
except ImportError:
|
37 |
+
USING_SPACES = False
|
38 |
+
|
39 |
+
DEFAULT_TTS_MODEL = "F5-TTS"
|
40 |
+
|
41 |
+
# GPU Decorator
|
42 |
+
def gpu_decorator(func):
|
43 |
+
if USING_SPACES:
|
44 |
+
return spaces.GPU(func)
|
45 |
+
return func
|
46 |
+
|
47 |
+
# Load models
|
48 |
+
vocoder = load_vocoder()
|
49 |
+
|
50 |
+
def load_f5tts(ckpt_path=None):
|
51 |
+
if ckpt_path is None:
|
52 |
+
ckpt_path = str(cached_path("hf://SWivid/F5-TTS/F5TTS_Base/model_1200000.safetensors"))
|
53 |
+
model_cfg = {
|
54 |
+
"dim": 1024,
|
55 |
+
"depth": 22,
|
56 |
+
"heads": 16,
|
57 |
+
"ff_mult": 2,
|
58 |
+
"text_dim": 512,
|
59 |
+
"conv_layers": 4
|
60 |
+
}
|
61 |
+
return load_model(DiT, model_cfg, ckpt_path)
|
62 |
+
|
63 |
+
F5TTS_ema_model = load_f5tts()
|
64 |
+
|
65 |
+
chat_model_state = None
|
66 |
+
chat_tokenizer_state = None
|
67 |
+
|
68 |
+
@gpu_decorator
|
69 |
+
def generate_response(messages, model, tokenizer):
|
70 |
+
"""Generate a response using the provided model and tokenizer."""
|
71 |
+
text = tokenizer.apply_chat_template(
|
72 |
+
messages,
|
73 |
+
tokenize=False,
|
74 |
+
add_generation_prompt=True,
|
75 |
+
)
|
76 |
+
|
77 |
+
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
|
78 |
+
generated_ids = model.generate(
|
79 |
+
input_features=model_inputs.input_features,
|
80 |
+
max_new_tokens=512,
|
81 |
+
temperature=0.7,
|
82 |
+
top_p=0.95,
|
83 |
+
)
|
84 |
+
|
85 |
+
if not generated_ids:
|
86 |
+
raise ValueError("No generated IDs returned by the model.")
|
87 |
+
|
88 |
+
generated_ids = [
|
89 |
+
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
|
90 |
+
]
|
91 |
+
|
92 |
+
if not generated_ids or not generated_ids[0]:
|
93 |
+
raise ValueError("Generated IDs are empty after processing.")
|
94 |
+
|
95 |
+
return tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
96 |
+
|
97 |
+
def extract_metadata_and_cover(ebook_path):
|
98 |
+
"""Extract cover image from the eBook."""
|
99 |
+
try:
|
100 |
+
cover_path = os.path.splitext(ebook_path)[0] + '.jpg'
|
101 |
+
subprocess.run(['ebook-meta', ebook_path, '--get-cover', cover_path], check=True)
|
102 |
+
if os.path.exists(cover_path):
|
103 |
+
return cover_path
|
104 |
+
except Exception as e:
|
105 |
+
print(f"Error extracting eBook cover: {e}")
|
106 |
+
return None
|
107 |
+
|
108 |
+
def embed_cover_into_mp3(mp3_path, cover_image_path):
|
109 |
+
"""Embed a cover image into the MP3 file's metadata."""
|
110 |
+
try:
|
111 |
+
audio = ID3(mp3_path)
|
112 |
+
except error:
|
113 |
+
audio = ID3()
|
114 |
+
|
115 |
+
# Remove existing APIC frames to avoid duplicates
|
116 |
+
audio.delall("APIC")
|
117 |
+
|
118 |
+
try:
|
119 |
+
with open(cover_image_path, 'rb') as img:
|
120 |
+
audio.add(APIC(
|
121 |
+
encoding=3, # 3 is for UTF-8
|
122 |
+
mime='image/jpeg', # Image MIME type
|
123 |
+
type=3, # 3 is for front cover
|
124 |
+
desc='Front cover', # Description
|
125 |
+
data=img.read()
|
126 |
+
))
|
127 |
+
# Save with ID3v2.3 for better compatibility
|
128 |
+
audio.save(mp3_path, v2_version=3)
|
129 |
+
print(f"Embedded cover image into {mp3_path}")
|
130 |
+
except Exception as e:
|
131 |
+
print(f"Failed to embed cover image into MP3: {e}")
|
132 |
+
|
133 |
+
def extract_text_and_title_from_epub(epub_path):
|
134 |
+
"""Extract text and title from an EPUB file."""
|
135 |
+
try:
|
136 |
+
book = epub.read_epub(epub_path)
|
137 |
+
print(f"EPUB '{epub_path}' successfully read.")
|
138 |
+
except Exception as e:
|
139 |
+
raise RuntimeError(f"Failed to read EPUB file: {e}")
|
140 |
+
|
141 |
+
text_content = []
|
142 |
+
title = None
|
143 |
+
|
144 |
+
try:
|
145 |
+
metadata = book.get_metadata('DC', 'title')
|
146 |
+
if metadata:
|
147 |
+
title = metadata[0][0]
|
148 |
+
print(f"Extracted title: {title}")
|
149 |
+
else:
|
150 |
+
title = os.path.splitext(os.path.basename(epub_path))[0]
|
151 |
+
print(f"No title in metadata. Using filename: {title}")
|
152 |
+
except Exception:
|
153 |
+
title = os.path.splitext(os.path.basename(epub_path))[0]
|
154 |
+
print(f"Using filename as title: {title}")
|
155 |
+
|
156 |
+
for item in book.get_items():
|
157 |
+
if item.get_type() == ITEM_DOCUMENT:
|
158 |
+
try:
|
159 |
+
soup = BeautifulSoup(item.get_content(), 'html.parser')
|
160 |
+
text = soup.get_text(separator=' ', strip=True)
|
161 |
+
if text:
|
162 |
+
text_content.append(text)
|
163 |
+
else:
|
164 |
+
print(f"No text in document item {item.get_id()}.")
|
165 |
+
except Exception as e:
|
166 |
+
print(f"Error parsing document item {item.get_id()}: {e}")
|
167 |
+
|
168 |
+
full_text = ' '.join(text_content)
|
169 |
+
|
170 |
+
if not full_text:
|
171 |
+
raise ValueError("No text found in EPUB file.")
|
172 |
+
|
173 |
+
print(f"Extracted {len(full_text)} characters from EPUB.")
|
174 |
+
return full_text, title
|
175 |
+
|
176 |
+
def convert_to_epub(input_path, output_path):
|
177 |
+
"""Convert an ebook to EPUB format using Calibre."""
|
178 |
+
try:
|
179 |
+
ensure_directory(os.path.dirname(output_path))
|
180 |
+
subprocess.run(['ebook-convert', input_path, output_path], check=True)
|
181 |
+
print(f"Converted {input_path} to EPUB.")
|
182 |
+
return True
|
183 |
+
except subprocess.CalledProcessError as e:
|
184 |
+
raise RuntimeError(f"Error converting eBook: {e}")
|
185 |
+
except Exception as e:
|
186 |
+
raise RuntimeError(f"Unexpected error during conversion: {e}")
|
187 |
+
|
188 |
+
def detect_file_type(file_path):
|
189 |
+
"""Detect the MIME type of a file."""
|
190 |
+
try:
|
191 |
+
mime = magic.Magic(mime=True)
|
192 |
+
return mime.from_file(file_path)
|
193 |
+
except Exception as e:
|
194 |
+
raise RuntimeError(f"Error detecting file type: {e}")
|
195 |
+
|
196 |
+
def ensure_directory(directory_path):
|
197 |
+
"""Ensure that a directory exists."""
|
198 |
+
try:
|
199 |
+
os.makedirs(directory_path, exist_ok=True)
|
200 |
+
except Exception as e:
|
201 |
+
raise RuntimeError(f"Error creating directory {directory_path}: {e}")
|
202 |
+
|
203 |
+
def sanitize_filename(filename):
|
204 |
+
"""Sanitize a filename by removing invalid characters."""
|
205 |
+
sanitized = re.sub(r'[\\/*?:"<>|]', "", filename)
|
206 |
+
return sanitized.replace(" ", "_")
|
207 |
+
|
208 |
+
def show_converted_audiobooks():
|
209 |
+
"""List all converted audiobook files."""
|
210 |
+
output_dir = os.path.join("Working_files", "Book")
|
211 |
+
if not os.path.exists(output_dir):
|
212 |
+
return ["No audiobooks found."]
|
213 |
+
|
214 |
+
files = [f for f in os.listdir(output_dir) if f.endswith(('.mp3', '.m4b'))]
|
215 |
+
if not files:
|
216 |
+
return ["No audiobooks found."]
|
217 |
+
|
218 |
+
return [os.path.join(output_dir, f) for f in files]
|
219 |
+
|
220 |
+
@gpu_decorator
|
221 |
+
def infer(ref_audio_orig, ref_text, gen_text, cross_fade_duration=0.15, speed=1, show_info=gr.Info, progress=gr.Progress()):
|
222 |
+
"""Perform inference to generate audio from text."""
|
223 |
+
try:
|
224 |
+
ref_audio, ref_text = preprocess_ref_audio_text(ref_audio_orig, ref_text, show_info=show_info)
|
225 |
+
except Exception as e:
|
226 |
+
raise RuntimeError(f"Error in preprocessing reference audio and text: {e}")
|
227 |
+
|
228 |
+
if not gen_text.strip():
|
229 |
+
raise ValueError("Generated text is empty. Please provide valid text content.")
|
230 |
+
|
231 |
+
try:
|
232 |
+
final_wave, final_sample_rate, _ = infer_process(
|
233 |
+
ref_audio,
|
234 |
+
ref_text,
|
235 |
+
gen_text,
|
236 |
+
F5TTS_ema_model,
|
237 |
+
vocoder,
|
238 |
+
cross_fade_duration=cross_fade_duration,
|
239 |
+
speed=speed,
|
240 |
+
show_info=show_info,
|
241 |
+
progress=progress, # Pass progress here
|
242 |
+
)
|
243 |
+
except Exception as e:
|
244 |
+
raise RuntimeError(f"Error during inference process: {e}")
|
245 |
+
|
246 |
+
return (final_sample_rate, final_wave), ref_text
|
247 |
+
|
248 |
+
@gpu_decorator
|
249 |
+
def basic_tts(ref_audio_input, ref_text_input, gen_file_input, cross_fade_duration, speed, progress=gr.Progress()):
|
250 |
+
"""Main function to convert eBooks to audiobooks."""
|
251 |
+
try:
|
252 |
+
last_file = None
|
253 |
+
|
254 |
+
num_ebooks = len(gen_file_input)
|
255 |
+
for idx, ebook in enumerate(gen_file_input):
|
256 |
+
progress(0, desc=f"Processing ebook {idx+1}/{num_ebooks}")
|
257 |
+
epub_path = ebook
|
258 |
+
if not os.path.exists(epub_path):
|
259 |
+
raise FileNotFoundError(f"File not found: {epub_path}")
|
260 |
+
|
261 |
+
file_type = detect_file_type(epub_path)
|
262 |
+
if file_type != 'application/epub+zip':
|
263 |
+
sanitized_base = sanitize_filename(os.path.splitext(os.path.basename(epub_path))[0])
|
264 |
+
temp_epub = os.path.join("Working_files", "temp_converted", f"{sanitized_base}.epub")
|
265 |
+
convert_to_epub(epub_path, temp_epub)
|
266 |
+
epub_path = temp_epub
|
267 |
+
|
268 |
+
progress(0.1, desc="Extracting text and title from EPUB")
|
269 |
+
gen_text, ebook_title = extract_text_and_title_from_epub(epub_path)
|
270 |
+
cover_image = extract_metadata_and_cover(epub_path)
|
271 |
+
|
272 |
+
ref_text = ref_text_input or ""
|
273 |
+
|
274 |
+
progress(0.2, desc="Starting inference")
|
275 |
+
audio_out, _ = infer(
|
276 |
+
ref_audio_input,
|
277 |
+
ref_text,
|
278 |
+
gen_text,
|
279 |
+
cross_fade_duration,
|
280 |
+
speed,
|
281 |
+
progress=progress, # Pass progress here
|
282 |
+
)
|
283 |
+
|
284 |
+
progress(0.8, desc="Stitching audio files")
|
285 |
+
sample_rate, wave = audio_out
|
286 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_wav:
|
287 |
+
sf.write(tmp_wav.name, wave, sample_rate)
|
288 |
+
tmp_wav_path = tmp_wav.name
|
289 |
+
|
290 |
+
progress(0.9, desc="Converting to MP3")
|
291 |
+
sanitized_title = sanitize_filename(ebook_title) or f"audiobook_{int(tempfile._get_default_tempdir())}"
|
292 |
+
tmp_mp3_path = os.path.join("Working_files", "Book", f"{sanitized_title}.mp3")
|
293 |
+
ensure_directory(os.path.dirname(tmp_mp3_path))
|
294 |
+
|
295 |
+
audio = AudioSegment.from_wav(tmp_wav_path)
|
296 |
+
audio.export(tmp_mp3_path, format="mp3", bitrate="256k")
|
297 |
+
|
298 |
+
if cover_image:
|
299 |
+
embed_cover_into_mp3(tmp_mp3_path, cover_image)
|
300 |
+
|
301 |
+
os.remove(tmp_wav_path)
|
302 |
+
if cover_image and os.path.exists(cover_image):
|
303 |
+
os.remove(cover_image)
|
304 |
+
|
305 |
+
last_file = tmp_mp3_path
|
306 |
+
progress(1, desc="Completed processing ebook")
|
307 |
+
|
308 |
+
audiobooks = show_converted_audiobooks()
|
309 |
+
|
310 |
+
return last_file, audiobooks
|
311 |
+
|
312 |
+
except Exception as e:
|
313 |
+
print(f"An error occurred: {e}")
|
314 |
+
raise e
|
315 |
+
|
316 |
+
def create_gradio_app():
|
317 |
+
"""Create and configure the Gradio application."""
|
318 |
+
with gr.Blocks(theme=gr.themes.Ocean()) as app:
|
319 |
+
gr.Markdown("# eBook to Audiobook with F5-TTS!")
|
320 |
+
|
321 |
+
ref_audio_input = gr.Audio(
|
322 |
+
label="Upload Voice File (<15 sec) or Record with Mic Icon (Ensure Natural Phrasing, Trim Silence)",
|
323 |
+
type="filepath"
|
324 |
+
)
|
325 |
+
|
326 |
+
gen_file_input = gr.Files(
|
327 |
+
label="Upload eBook or Multiple for Batch Processing (epub, mobi, pdf, txt, html)",
|
328 |
+
file_types=[".epub", ".mobi", ".pdf", ".txt", ".html"],
|
329 |
+
type="filepath",
|
330 |
+
file_count="multiple",
|
331 |
+
)
|
332 |
+
|
333 |
+
generate_btn = gr.Button("Start", variant="primary")
|
334 |
+
|
335 |
+
show_audiobooks_btn = gr.Button("Show All Completed Audiobooks", variant="secondary")
|
336 |
+
audiobooks_output = gr.Files(label="Converted Audiobooks (Download Links ->)")
|
337 |
+
|
338 |
+
player = gr.Audio(label="Play Latest Converted Audiobook", interactive=False)
|
339 |
+
|
340 |
+
with gr.Accordion("Advanced Settings", open=False):
|
341 |
+
ref_text_input = gr.Textbox(
|
342 |
+
label="Reference Text (Leave Blank for Automatic Transcription)",
|
343 |
+
lines=2,
|
344 |
+
)
|
345 |
+
speed_slider = gr.Slider(
|
346 |
+
label="Speech Speed (Adjusting Can Cause Artifacts)",
|
347 |
+
minimum=0.3,
|
348 |
+
maximum=2.0,
|
349 |
+
value=1.0,
|
350 |
+
step=0.1,
|
351 |
+
)
|
352 |
+
cross_fade_duration_slider = gr.Slider(
|
353 |
+
label="Cross-Fade Duration (Between Generated Audio Chunks)",
|
354 |
+
minimum=0.0,
|
355 |
+
maximum=1.0,
|
356 |
+
value=0.15,
|
357 |
+
step=0.01,
|
358 |
+
)
|
359 |
+
|
360 |
+
generate_btn.click(
|
361 |
+
basic_tts,
|
362 |
+
inputs=[
|
363 |
+
ref_audio_input,
|
364 |
+
ref_text_input,
|
365 |
+
gen_file_input,
|
366 |
+
cross_fade_duration_slider,
|
367 |
+
speed_slider,
|
368 |
+
],
|
369 |
+
outputs=[player, audiobooks_output],
|
370 |
+
show_progress=True, # Enable progress bar
|
371 |
+
)
|
372 |
+
|
373 |
+
show_audiobooks_btn.click(
|
374 |
+
show_converted_audiobooks,
|
375 |
+
inputs=[],
|
376 |
+
outputs=[audiobooks_output],
|
377 |
+
)
|
378 |
+
|
379 |
+
return app
|
380 |
+
|
381 |
+
@click.command()
|
382 |
+
@click.option("--port", "-p", default=None, type=int, help="Port to run the app on")
|
383 |
+
@click.option("--host", "-H", default=None, help="Host to run the app on")
|
384 |
+
@click.option(
|
385 |
+
"--share",
|
386 |
+
"-s",
|
387 |
+
default=False,
|
388 |
+
is_flag=True,
|
389 |
+
help="Share the app via Gradio share link",
|
390 |
+
)
|
391 |
+
@click.option("--api", "-a", default=True, is_flag=True, help="Allow API access")
|
392 |
+
def main(port, host, share, api):
|
393 |
+
"""Main entry point to launch the Gradio app."""
|
394 |
+
app = create_gradio_app()
|
395 |
+
print("Starting app...")
|
396 |
+
app.queue().launch(
|
397 |
+
server_name="0.0.0.0",
|
398 |
+
server_port=port or 7860,
|
399 |
+
share=True,
|
400 |
+
show_api=api,
|
401 |
+
debug=True
|
402 |
+
)
|
403 |
+
|
404 |
+
if __name__ == "__main__":
|
405 |
+
import sys
|
406 |
+
print("Arguments passed to Python:", sys.argv)
|
407 |
+
if not USING_SPACES:
|
408 |
+
main()
|
409 |
+
else:
|
410 |
+
app = create_gradio_app()
|
411 |
+
app.queue().launch(debug=True)
|
requirements.txt
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch>=1.13.0
|
2 |
+
torchaudio>=0.13.0
|
3 |
+
accelerate>=0.33.0
|
4 |
+
bitsandbytes>0.37.0
|
5 |
+
cached_path>=1.0.0
|
6 |
+
click>=8.0
|
7 |
+
datasets>=2.0.0
|
8 |
+
ema_pytorch>=0.5.2
|
9 |
+
gradio>=5.0.0
|
10 |
+
jieba>=0.42.1
|
11 |
+
librosa>=0.9.2
|
12 |
+
matplotlib>=3.5.1
|
13 |
+
numpy<=1.26.4
|
14 |
+
pydub>=0.25.1
|
15 |
+
pypinyin>=0.46.0
|
16 |
+
safetensors>=0.3.1
|
17 |
+
soundfile>=0.10.3
|
18 |
+
tomli>=2.0.1
|
19 |
+
torchdiffeq>=0.2.3
|
20 |
+
tqdm>=4.65.0
|
21 |
+
transformers>=4.0.0
|
22 |
+
vocos==0.1.0
|
23 |
+
wandb>=0.15.0
|
24 |
+
x_transformers>=1.31.14
|
25 |
+
f5_tts @ git+https://github.com/SWivid/F5-TTS.git
|
26 |
+
ebooklib==0.17.1
|
27 |
+
beautifulsoup4>=4.11.0
|
28 |
+
pdfminer.six>=20221105
|
29 |
+
python-magic>=0.4.27
|
30 |
+
nltk>=3.6.0
|
31 |
+
mutagen>=1.45.1
|