File size: 4,678 Bytes
416840c 7263dd8 f0db570 ecb2850 7263dd8 ecb2850 7263dd8 416840c 6b14ed6 f0db570 7263dd8 6b14ed6 f0db570 7263dd8 6b14ed6 7263dd8 f0db570 7263dd8 6b14ed6 7263dd8 f0db570 6b14ed6 f0db570 7263dd8 f0db570 7263dd8 f0db570 7263dd8 f0db570 416840c 297b79a f0db570 ecb2850 f0db570 416840c 7263dd8 416840c 7263dd8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import gradio as gr
from utils import generate_script, generate_audio, truncate_text
from prompts import SYSTEM_PROMPT
from pydub import AudioSegment
import pypdf
import os
import io
import tempfile
def estimate_audio_length(text):
# Estimate 150 words per minute
word_count = len(text.split())
return word_count / 150 # Returns length in minutes
def trim_dialogue(dialogue, target_length_minutes):
trimmed_dialogue = []
current_length = 0
target_length_seconds = target_length_minutes * 60
for item in dialogue.dialogue:
item_length = estimate_audio_length(item.text)
if current_length + item_length > target_length_minutes:
# Trim this item to fit
words = item.text.split()
words_to_keep = int((target_length_minutes - current_length) * 150)
item.text = " ".join(words[:words_to_keep]) + "..."
trimmed_dialogue.append(item)
break
trimmed_dialogue.append(item)
current_length += item_length
return trimmed_dialogue
def generate_podcast(file, tone, length):
# Extract text from PDF
if not file.name.lower().endswith('.pdf'):
raise gr.Error("Please upload a PDF file.")
try:
pdf_reader = pypdf.PdfReader(file.name)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
except Exception as e:
raise gr.Error(f"Error reading the PDF file: {str(e)}")
# Truncate text to 2048 tokens
truncated_text = truncate_text(text)
if len(truncated_text) < len(text):
print("Warning: The input text was truncated to fit within 2048 tokens.")
# Generate script
try:
script = generate_script(SYSTEM_PROMPT, truncated_text, tone)
except Exception as e:
raise gr.Error(f"Error generating script: {str(e)}")
# Determine target length in minutes
target_length = 2 if length == "Short (1-2 min)" else 5
# Trim dialogue to fit target length
trimmed_dialogue = trim_dialogue(script, target_length)
# Generate audio for each dialogue item
audio_segments = []
transcript = ""
try:
for item in trimmed_dialogue:
audio_file = generate_audio(item.text, item.speaker)
audio_segment = AudioSegment.from_mp3(audio_file)
audio_segments.append(audio_segment)
transcript += f"**{item.speaker}**: {item.text}\n\n"
os.remove(audio_file) # Clean up temporary audio file
except Exception as e:
raise gr.Error(f"Error generating audio: {str(e)}")
# Combine audio segments
combined_audio = sum(audio_segments)
# Ensure audio doesn't exceed target length
target_length_ms = target_length * 60 * 1000
if len(combined_audio) > target_length_ms:
combined_audio = combined_audio[:target_length_ms]
# Save combined audio to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
combined_audio.export(temp_audio.name, format="mp3")
temp_audio_path = temp_audio.name
return temp_audio_path, transcript
# Gradio interface setup remains the same
instructions = """
# Podcast Generator
Welcome to the Podcast Generator project! This tool allows you to create custom podcast episodes using AI-generated content.
## Features
* Generate podcast scripts from PDF content
* Convert text to speech for a natural listening experience
* Choose the tone of your podcast
* Export episodes as MP3 files
## How to Use
1. Upload a PDF file (content will be truncated to 2048 tokens if longer)
2. Select the desired tone (humorous, casual, formal)
3. Choose the podcast length
4. Click "Generate" to create your podcast
5. Listen to the generated audio and review the transcript
Note: This tool uses the LLaMa 3.1 70B model for script generation and gTTS for text-to-speech conversion. The input is limited to 2048 tokens to ensure compatibility with the model.
"""
iface = gr.Interface(
fn=generate_podcast,
inputs=[
gr.File(label="Upload PDF file", file_types=[".pdf"]),
gr.Radio(["humorous", "casual", "formal"], label="Select podcast tone", value="casual"),
gr.Radio(["Short (1-2 min)", "Medium (3-5 min)"], label="Podcast length", value="Medium (3-5 min)")
],
outputs=[
gr.Audio(label="Generated Podcast"),
gr.Markdown(label="Transcript")
],
title="Custom NotebookLM-type Podcast Generator (2048 token limit)",
description=instructions,
allow_flagging="never",
theme=gr.themes.Soft()
)
if __name__ == "__main__":
iface.launch() |