Spaces:

udayl
/

NotebookLM-Kokoro_TTS_App

Running

App Files Files Community

udayl commited on Jul 1

Commit

03eec30

1 Parent(s): 97c565c

updated app and Dockerfile

Browse files

Files changed (3) hide show

Dockerfile +24 -0
gradio_app.py +112 -25
requirements.txt +8 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,24 @@

+FROM python:3.11-slim
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    espeak-ng \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements and install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application files
+COPY . .
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+# Expose port
+EXPOSE 7860
+# Command to run the application
+CMD ["python", "gradio_app.py"]

gradio_app.py CHANGED Viewed

@@ -8,6 +8,9 @@ import numpy as np
 import ast
 import shutil
 import warnings
 warnings.filterwarnings("ignore")
 # A modified version of generate_audio_from_script to accept voice mapping
@@ -68,9 +71,17 @@ def generate_audio_from_script_with_voices(script, speaker1_voice, speaker2_voic
         return None
-def process_pdf(pdf_file, speaker1_voice, speaker2_voice, provider):
     """Process the uploaded PDF file and generate audio"""
     try:
         # Check if we received a valid file
         if pdf_file is None:
             return "No file uploaded", None
@@ -112,45 +123,121 @@ def process_pdf(pdf_file, speaker1_voice, speaker2_voice, provider):
 def create_gradio_app():
-    with gr.Blocks() as app:
-        gr.Markdown("# NotebookLM-Kokoro TTS App")
-        gr.Markdown("Upload a PDF, choose voices, and generate TTS audio using Kokoro.")
-        with gr.Row():
-            pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
         with gr.Row():
-            with gr.Column():
-                speaker1_voice = gr.Dropdown(
-                    choices=["af_heart", "af_bella", "hf_beta"],
-                    value="af_heart",
-                    label="Speaker 1 Voice"
-                )
-                speaker2_voice = gr.Dropdown(
-                    choices=["af_nicole", "af_heart", "bf_emma"],
-                    value="af_nicole",
-                    label="Speaker 2 Voice"
                 )
                 provider = gr.Radio(
                     choices=["openai", "openrouter"],
                     value="openrouter",
-                    label="API Provider (TTS Script Generation)"
                 )
-                submit_btn = gr.Button("Generate Audio")
-        with gr.Row():
-            status_output = gr.Textbox(label="Status")
-            audio_output = gr.Audio(label="Generated Audio", type="filepath")
         submit_btn.click(
             fn=process_pdf,
             inputs=[pdf_input, speaker1_voice, speaker2_voice, provider],
-            outputs=[status_output, audio_output]
         )
     return app
 if __name__ == "__main__":
     demo = create_gradio_app()
-    demo.launch(share=True)  # add share=True to get a public URL

 import ast
 import shutil
 import warnings
+import os
+import gradio as gr
+from notebook_lm_kokoro import generate_podcast_script, generate_audio_from_script
 warnings.filterwarnings("ignore")
 # A modified version of generate_audio_from_script to accept voice mapping
         return None
+def process_pdf(pdf_file, speaker1_voice, speaker2_voice, provider, api_key, openrouter_base=None):
     """Process the uploaded PDF file and generate audio"""
     try:
+        # Set API configuration based on provider
+        if provider == "openai":
+            os.environ["OPENAI_API_KEY"] = api_key
+            os.environ["OPENROUTER_API_BASE"] = "https://api.openai.com/v1"
+        else:
+            os.environ["OPENAI_API_KEY"] = api_key
+            os.environ["OPENROUTER_API_BASE"] = openrouter_base or "https://openrouter.ai/api/v1"
         # Check if we received a valid file
         if pdf_file is None:
             return "No file uploaded", None
 def create_gradio_app():
+    # Add CSS for better styling
+    css = """
+    .gradio-container {max-width: 900px !important}
+    """
+    with gr.Blocks(css=css, theme=gr.themes.Soft()) as app:
+        gr.Markdown(
+            """
+            # 📚 NotebookLM-Kokoro TTS App
+            Upload a PDF, choose voices, and generate conversational audio using Kokoro TTS.
+            """
+        )
         with gr.Row():
+            with gr.Column(scale=2):
+                pdf_input = gr.File(
+                    label="Upload PDF Document",
+                    file_types=[".pdf"],
+                    type="filepath"
                 )
+                with gr.Row():
+                    speaker1_voice = gr.Dropdown(
+                        choices=["af_heart", "af_bella", "hf_beta"],
+                        value="af_heart",
+                        label="Speaker 1 Voice"
+                    )
+                    speaker2_voice = gr.Dropdown(
+                        choices=["af_nicole", "af_heart", "bf_emma"],
+                        value="af_nicole",
+                        label="Speaker 2 Voice"
+                    )
                 provider = gr.Radio(
                     choices=["openai", "openrouter"],
                     value="openrouter",
+                    label="API Provider"
+                )
+                with gr.Group():
+                    provider = gr.Radio(
+                        choices=["openai", "openrouter"],
+                        value="openrouter",
+                        label="API Provider"
+                    )
+                    api_key = gr.Textbox(
+                        label="API Key",
+                        placeholder="Enter your API key here...",
+                        type="password",
+                        class_name="api-key-input"
+                    )
+                    openrouter_base = gr.Textbox(
+                        label="OpenRouter Base URL (optional)",
+                        placeholder="https://openrouter.ai/api/v1",
+                        visible=False
+                    )
+                    # Show/hide OpenRouter base URL based on provider selection
+                    def toggle_openrouter_base(provider_choice):
+                        return gr.update(visible=provider_choice == "openrouter")
+                    provider.change(
+                        fn=toggle_openrouter_base,
+                        inputs=[provider],
+                        outputs=[openrouter_base]
+                    )
+                submit_btn = gr.Button("🎙️ Generate Audio", variant="primary")
+            with gr.Column(scale=2):
+                status_output = gr.Textbox(
+                    label="Status",
+                    placeholder="Processing status will appear here..."
+                )
+                audio_output = gr.Audio(
+                    label="Generated Audio",
+                    type="filepath"
                 )
+        # Examples section
+        gr.Examples(
+            examples=[
+                ["sample.pdf", "af_heart", "af_nicole", "openrouter"],
+            ],
+            inputs=[pdf_input, speaker1_voice, speaker2_voice, provider],
+            outputs=[status_output, audio_output],
+            fn=process_pdf,
+            cache_examples=True,
+        )
         submit_btn.click(
             fn=process_pdf,
             inputs=[pdf_input, speaker1_voice, speaker2_voice, provider],
+            outputs=[status_output, audio_output],
+            api_name="generate"
+        )
+        gr.Markdown(
+            """
+            ### 📝 Notes
+            - Make sure your PDF is readable and contains text (not scanned images)
+            - Processing large PDFs may take a few minutes
+            - You need a valid OpenAI/OpenRouter API key set as environment variable
+            """
         )
     return app
 if __name__ == "__main__":
     demo = create_gradio_app()
+    demo.queue(concurrency_count=1).launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=True,
+        debug=True
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+kokoro
+soundfile
+torch
+PyPDF2
+numpy
+openai
+ipython
+gradio>=4.0.0