Spaces:
Sleeping
Sleeping
updated app and Dockerfile
Browse files- Dockerfile +24 -0
- gradio_app.py +112 -25
- requirements.txt +8 -0
Dockerfile
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.11-slim
|
2 |
+
|
3 |
+
WORKDIR /app
|
4 |
+
|
5 |
+
# Install system dependencies
|
6 |
+
RUN apt-get update && apt-get install -y \
|
7 |
+
espeak-ng \
|
8 |
+
&& rm -rf /var/lib/apt/lists/*
|
9 |
+
|
10 |
+
# Copy requirements and install Python dependencies
|
11 |
+
COPY requirements.txt .
|
12 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
13 |
+
|
14 |
+
# Copy application files
|
15 |
+
COPY . .
|
16 |
+
|
17 |
+
# Set environment variables
|
18 |
+
ENV PYTHONUNBUFFERED=1
|
19 |
+
|
20 |
+
# Expose port
|
21 |
+
EXPOSE 7860
|
22 |
+
|
23 |
+
# Command to run the application
|
24 |
+
CMD ["python", "gradio_app.py"]
|
gradio_app.py
CHANGED
@@ -8,6 +8,9 @@ import numpy as np
|
|
8 |
import ast
|
9 |
import shutil
|
10 |
import warnings
|
|
|
|
|
|
|
11 |
warnings.filterwarnings("ignore")
|
12 |
|
13 |
# A modified version of generate_audio_from_script to accept voice mapping
|
@@ -68,9 +71,17 @@ def generate_audio_from_script_with_voices(script, speaker1_voice, speaker2_voic
|
|
68 |
return None
|
69 |
|
70 |
|
71 |
-
def process_pdf(pdf_file, speaker1_voice, speaker2_voice, provider):
|
72 |
"""Process the uploaded PDF file and generate audio"""
|
73 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
# Check if we received a valid file
|
75 |
if pdf_file is None:
|
76 |
return "No file uploaded", None
|
@@ -112,45 +123,121 @@ def process_pdf(pdf_file, speaker1_voice, speaker2_voice, provider):
|
|
112 |
|
113 |
|
114 |
def create_gradio_app():
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
|
122 |
with gr.Row():
|
123 |
-
with gr.Column():
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
)
|
129 |
-
speaker2_voice = gr.Dropdown(
|
130 |
-
choices=["af_nicole", "af_heart", "bf_emma"],
|
131 |
-
value="af_nicole",
|
132 |
-
label="Speaker 2 Voice"
|
133 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
provider = gr.Radio(
|
135 |
choices=["openai", "openrouter"],
|
136 |
value="openrouter",
|
137 |
-
label="API Provider
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
)
|
139 |
-
submit_btn = gr.Button("Generate Audio")
|
140 |
|
141 |
-
|
142 |
-
|
143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
|
145 |
submit_btn.click(
|
146 |
fn=process_pdf,
|
147 |
inputs=[pdf_input, speaker1_voice, speaker2_voice, provider],
|
148 |
-
outputs=[status_output, audio_output]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
)
|
150 |
|
151 |
return app
|
152 |
|
153 |
-
|
154 |
if __name__ == "__main__":
|
155 |
demo = create_gradio_app()
|
156 |
-
demo.
|
|
|
|
|
|
|
|
|
|
|
|
8 |
import ast
|
9 |
import shutil
|
10 |
import warnings
|
11 |
+
import os
|
12 |
+
import gradio as gr
|
13 |
+
from notebook_lm_kokoro import generate_podcast_script, generate_audio_from_script
|
14 |
warnings.filterwarnings("ignore")
|
15 |
|
16 |
# A modified version of generate_audio_from_script to accept voice mapping
|
|
|
71 |
return None
|
72 |
|
73 |
|
74 |
+
def process_pdf(pdf_file, speaker1_voice, speaker2_voice, provider, api_key, openrouter_base=None):
|
75 |
"""Process the uploaded PDF file and generate audio"""
|
76 |
try:
|
77 |
+
|
78 |
+
# Set API configuration based on provider
|
79 |
+
if provider == "openai":
|
80 |
+
os.environ["OPENAI_API_KEY"] = api_key
|
81 |
+
os.environ["OPENROUTER_API_BASE"] = "https://api.openai.com/v1"
|
82 |
+
else:
|
83 |
+
os.environ["OPENAI_API_KEY"] = api_key
|
84 |
+
os.environ["OPENROUTER_API_BASE"] = openrouter_base or "https://openrouter.ai/api/v1"
|
85 |
# Check if we received a valid file
|
86 |
if pdf_file is None:
|
87 |
return "No file uploaded", None
|
|
|
123 |
|
124 |
|
125 |
def create_gradio_app():
|
126 |
+
# Add CSS for better styling
|
127 |
+
css = """
|
128 |
+
.gradio-container {max-width: 900px !important}
|
129 |
+
"""
|
130 |
+
|
131 |
+
with gr.Blocks(css=css, theme=gr.themes.Soft()) as app:
|
132 |
+
gr.Markdown(
|
133 |
+
"""
|
134 |
+
# π NotebookLM-Kokoro TTS App
|
135 |
+
Upload a PDF, choose voices, and generate conversational audio using Kokoro TTS.
|
136 |
+
"""
|
137 |
+
)
|
138 |
|
139 |
with gr.Row():
|
140 |
+
with gr.Column(scale=2):
|
141 |
+
pdf_input = gr.File(
|
142 |
+
label="Upload PDF Document",
|
143 |
+
file_types=[".pdf"],
|
144 |
+
type="filepath"
|
|
|
|
|
|
|
|
|
|
|
145 |
)
|
146 |
+
|
147 |
+
with gr.Row():
|
148 |
+
speaker1_voice = gr.Dropdown(
|
149 |
+
choices=["af_heart", "af_bella", "hf_beta"],
|
150 |
+
value="af_heart",
|
151 |
+
label="Speaker 1 Voice"
|
152 |
+
)
|
153 |
+
speaker2_voice = gr.Dropdown(
|
154 |
+
choices=["af_nicole", "af_heart", "bf_emma"],
|
155 |
+
value="af_nicole",
|
156 |
+
label="Speaker 2 Voice"
|
157 |
+
)
|
158 |
+
|
159 |
provider = gr.Radio(
|
160 |
choices=["openai", "openrouter"],
|
161 |
value="openrouter",
|
162 |
+
label="API Provider"
|
163 |
+
)
|
164 |
+
|
165 |
+
with gr.Group():
|
166 |
+
provider = gr.Radio(
|
167 |
+
choices=["openai", "openrouter"],
|
168 |
+
value="openrouter",
|
169 |
+
label="API Provider"
|
170 |
+
)
|
171 |
+
|
172 |
+
api_key = gr.Textbox(
|
173 |
+
label="API Key",
|
174 |
+
placeholder="Enter your API key here...",
|
175 |
+
type="password",
|
176 |
+
class_name="api-key-input"
|
177 |
+
)
|
178 |
+
|
179 |
+
openrouter_base = gr.Textbox(
|
180 |
+
label="OpenRouter Base URL (optional)",
|
181 |
+
placeholder="https://openrouter.ai/api/v1",
|
182 |
+
visible=False
|
183 |
+
)
|
184 |
+
|
185 |
+
# Show/hide OpenRouter base URL based on provider selection
|
186 |
+
def toggle_openrouter_base(provider_choice):
|
187 |
+
return gr.update(visible=provider_choice == "openrouter")
|
188 |
+
|
189 |
+
provider.change(
|
190 |
+
fn=toggle_openrouter_base,
|
191 |
+
inputs=[provider],
|
192 |
+
outputs=[openrouter_base]
|
193 |
+
)
|
194 |
+
|
195 |
+
submit_btn = gr.Button("ποΈ Generate Audio", variant="primary")
|
196 |
+
|
197 |
+
with gr.Column(scale=2):
|
198 |
+
status_output = gr.Textbox(
|
199 |
+
label="Status",
|
200 |
+
placeholder="Processing status will appear here..."
|
201 |
+
)
|
202 |
+
audio_output = gr.Audio(
|
203 |
+
label="Generated Audio",
|
204 |
+
type="filepath"
|
205 |
)
|
|
|
206 |
|
207 |
+
# Examples section
|
208 |
+
gr.Examples(
|
209 |
+
examples=[
|
210 |
+
["sample.pdf", "af_heart", "af_nicole", "openrouter"],
|
211 |
+
],
|
212 |
+
inputs=[pdf_input, speaker1_voice, speaker2_voice, provider],
|
213 |
+
outputs=[status_output, audio_output],
|
214 |
+
fn=process_pdf,
|
215 |
+
cache_examples=True,
|
216 |
+
)
|
217 |
|
218 |
submit_btn.click(
|
219 |
fn=process_pdf,
|
220 |
inputs=[pdf_input, speaker1_voice, speaker2_voice, provider],
|
221 |
+
outputs=[status_output, audio_output],
|
222 |
+
api_name="generate"
|
223 |
+
)
|
224 |
+
|
225 |
+
gr.Markdown(
|
226 |
+
"""
|
227 |
+
### π Notes
|
228 |
+
- Make sure your PDF is readable and contains text (not scanned images)
|
229 |
+
- Processing large PDFs may take a few minutes
|
230 |
+
- You need a valid OpenAI/OpenRouter API key set as environment variable
|
231 |
+
"""
|
232 |
)
|
233 |
|
234 |
return app
|
235 |
|
|
|
236 |
if __name__ == "__main__":
|
237 |
demo = create_gradio_app()
|
238 |
+
demo.queue(concurrency_count=1).launch(
|
239 |
+
server_name="0.0.0.0",
|
240 |
+
server_port=7860,
|
241 |
+
share=True,
|
242 |
+
debug=True
|
243 |
+
)
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
kokoro
|
2 |
+
soundfile
|
3 |
+
torch
|
4 |
+
PyPDF2
|
5 |
+
numpy
|
6 |
+
openai
|
7 |
+
ipython
|
8 |
+
gradio>=4.0.0
|