udayl commited on
Commit
03eec30
Β·
1 Parent(s): 97c565c

updated app and Dockerfile

Browse files
Files changed (3) hide show
  1. Dockerfile +24 -0
  2. gradio_app.py +112 -25
  3. requirements.txt +8 -0
Dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y \
7
+ espeak-ng \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Copy requirements and install Python dependencies
11
+ COPY requirements.txt .
12
+ RUN pip install --no-cache-dir -r requirements.txt
13
+
14
+ # Copy application files
15
+ COPY . .
16
+
17
+ # Set environment variables
18
+ ENV PYTHONUNBUFFERED=1
19
+
20
+ # Expose port
21
+ EXPOSE 7860
22
+
23
+ # Command to run the application
24
+ CMD ["python", "gradio_app.py"]
gradio_app.py CHANGED
@@ -8,6 +8,9 @@ import numpy as np
8
  import ast
9
  import shutil
10
  import warnings
 
 
 
11
  warnings.filterwarnings("ignore")
12
 
13
  # A modified version of generate_audio_from_script to accept voice mapping
@@ -68,9 +71,17 @@ def generate_audio_from_script_with_voices(script, speaker1_voice, speaker2_voic
68
  return None
69
 
70
 
71
- def process_pdf(pdf_file, speaker1_voice, speaker2_voice, provider):
72
  """Process the uploaded PDF file and generate audio"""
73
  try:
 
 
 
 
 
 
 
 
74
  # Check if we received a valid file
75
  if pdf_file is None:
76
  return "No file uploaded", None
@@ -112,45 +123,121 @@ def process_pdf(pdf_file, speaker1_voice, speaker2_voice, provider):
112
 
113
 
114
  def create_gradio_app():
115
- with gr.Blocks() as app:
116
- gr.Markdown("# NotebookLM-Kokoro TTS App")
117
- gr.Markdown("Upload a PDF, choose voices, and generate TTS audio using Kokoro.")
118
-
119
- with gr.Row():
120
- pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
 
 
 
 
 
 
121
 
122
  with gr.Row():
123
- with gr.Column():
124
- speaker1_voice = gr.Dropdown(
125
- choices=["af_heart", "af_bella", "hf_beta"],
126
- value="af_heart",
127
- label="Speaker 1 Voice"
128
- )
129
- speaker2_voice = gr.Dropdown(
130
- choices=["af_nicole", "af_heart", "bf_emma"],
131
- value="af_nicole",
132
- label="Speaker 2 Voice"
133
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  provider = gr.Radio(
135
  choices=["openai", "openrouter"],
136
  value="openrouter",
137
- label="API Provider (TTS Script Generation)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  )
139
- submit_btn = gr.Button("Generate Audio")
140
 
141
- with gr.Row():
142
- status_output = gr.Textbox(label="Status")
143
- audio_output = gr.Audio(label="Generated Audio", type="filepath")
 
 
 
 
 
 
 
144
 
145
  submit_btn.click(
146
  fn=process_pdf,
147
  inputs=[pdf_input, speaker1_voice, speaker2_voice, provider],
148
- outputs=[status_output, audio_output]
 
 
 
 
 
 
 
 
 
 
149
  )
150
 
151
  return app
152
 
153
-
154
  if __name__ == "__main__":
155
  demo = create_gradio_app()
156
- demo.launch(share=True) # add share=True to get a public URL
 
 
 
 
 
 
8
  import ast
9
  import shutil
10
  import warnings
11
+ import os
12
+ import gradio as gr
13
+ from notebook_lm_kokoro import generate_podcast_script, generate_audio_from_script
14
  warnings.filterwarnings("ignore")
15
 
16
  # A modified version of generate_audio_from_script to accept voice mapping
 
71
  return None
72
 
73
 
74
+ def process_pdf(pdf_file, speaker1_voice, speaker2_voice, provider, api_key, openrouter_base=None):
75
  """Process the uploaded PDF file and generate audio"""
76
  try:
77
+
78
+ # Set API configuration based on provider
79
+ if provider == "openai":
80
+ os.environ["OPENAI_API_KEY"] = api_key
81
+ os.environ["OPENROUTER_API_BASE"] = "https://api.openai.com/v1"
82
+ else:
83
+ os.environ["OPENAI_API_KEY"] = api_key
84
+ os.environ["OPENROUTER_API_BASE"] = openrouter_base or "https://openrouter.ai/api/v1"
85
  # Check if we received a valid file
86
  if pdf_file is None:
87
  return "No file uploaded", None
 
123
 
124
 
125
  def create_gradio_app():
126
+ # Add CSS for better styling
127
+ css = """
128
+ .gradio-container {max-width: 900px !important}
129
+ """
130
+
131
+ with gr.Blocks(css=css, theme=gr.themes.Soft()) as app:
132
+ gr.Markdown(
133
+ """
134
+ # πŸ“š NotebookLM-Kokoro TTS App
135
+ Upload a PDF, choose voices, and generate conversational audio using Kokoro TTS.
136
+ """
137
+ )
138
 
139
  with gr.Row():
140
+ with gr.Column(scale=2):
141
+ pdf_input = gr.File(
142
+ label="Upload PDF Document",
143
+ file_types=[".pdf"],
144
+ type="filepath"
 
 
 
 
 
145
  )
146
+
147
+ with gr.Row():
148
+ speaker1_voice = gr.Dropdown(
149
+ choices=["af_heart", "af_bella", "hf_beta"],
150
+ value="af_heart",
151
+ label="Speaker 1 Voice"
152
+ )
153
+ speaker2_voice = gr.Dropdown(
154
+ choices=["af_nicole", "af_heart", "bf_emma"],
155
+ value="af_nicole",
156
+ label="Speaker 2 Voice"
157
+ )
158
+
159
  provider = gr.Radio(
160
  choices=["openai", "openrouter"],
161
  value="openrouter",
162
+ label="API Provider"
163
+ )
164
+
165
+ with gr.Group():
166
+ provider = gr.Radio(
167
+ choices=["openai", "openrouter"],
168
+ value="openrouter",
169
+ label="API Provider"
170
+ )
171
+
172
+ api_key = gr.Textbox(
173
+ label="API Key",
174
+ placeholder="Enter your API key here...",
175
+ type="password",
176
+ class_name="api-key-input"
177
+ )
178
+
179
+ openrouter_base = gr.Textbox(
180
+ label="OpenRouter Base URL (optional)",
181
+ placeholder="https://openrouter.ai/api/v1",
182
+ visible=False
183
+ )
184
+
185
+ # Show/hide OpenRouter base URL based on provider selection
186
+ def toggle_openrouter_base(provider_choice):
187
+ return gr.update(visible=provider_choice == "openrouter")
188
+
189
+ provider.change(
190
+ fn=toggle_openrouter_base,
191
+ inputs=[provider],
192
+ outputs=[openrouter_base]
193
+ )
194
+
195
+ submit_btn = gr.Button("πŸŽ™οΈ Generate Audio", variant="primary")
196
+
197
+ with gr.Column(scale=2):
198
+ status_output = gr.Textbox(
199
+ label="Status",
200
+ placeholder="Processing status will appear here..."
201
+ )
202
+ audio_output = gr.Audio(
203
+ label="Generated Audio",
204
+ type="filepath"
205
  )
 
206
 
207
+ # Examples section
208
+ gr.Examples(
209
+ examples=[
210
+ ["sample.pdf", "af_heart", "af_nicole", "openrouter"],
211
+ ],
212
+ inputs=[pdf_input, speaker1_voice, speaker2_voice, provider],
213
+ outputs=[status_output, audio_output],
214
+ fn=process_pdf,
215
+ cache_examples=True,
216
+ )
217
 
218
  submit_btn.click(
219
  fn=process_pdf,
220
  inputs=[pdf_input, speaker1_voice, speaker2_voice, provider],
221
+ outputs=[status_output, audio_output],
222
+ api_name="generate"
223
+ )
224
+
225
+ gr.Markdown(
226
+ """
227
+ ### πŸ“ Notes
228
+ - Make sure your PDF is readable and contains text (not scanned images)
229
+ - Processing large PDFs may take a few minutes
230
+ - You need a valid OpenAI/OpenRouter API key set as environment variable
231
+ """
232
  )
233
 
234
  return app
235
 
 
236
  if __name__ == "__main__":
237
  demo = create_gradio_app()
238
+ demo.queue(concurrency_count=1).launch(
239
+ server_name="0.0.0.0",
240
+ server_port=7860,
241
+ share=True,
242
+ debug=True
243
+ )
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ kokoro
2
+ soundfile
3
+ torch
4
+ PyPDF2
5
+ numpy
6
+ openai
7
+ ipython
8
+ gradio>=4.0.0