File size: 4,678 Bytes
416840c
7263dd8
f0db570
ecb2850
7263dd8
ecb2850
7263dd8
 
416840c
6b14ed6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f0db570
 
7263dd8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b14ed6
 
 
 
 
 
f0db570
7263dd8
 
 
6b14ed6
7263dd8
 
 
 
 
 
 
 
f0db570
7263dd8
 
6b14ed6
 
 
 
7263dd8
 
 
 
 
 
 
f0db570
6b14ed6
f0db570
 
 
 
 
 
7263dd8
f0db570
 
 
 
 
7263dd8
f0db570
 
 
 
7263dd8
 
f0db570
416840c
 
 
 
297b79a
f0db570
 
ecb2850
 
 
f0db570
416840c
7263dd8
 
 
 
416840c
 
7263dd8
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import gradio as gr
from utils import generate_script, generate_audio, truncate_text
from prompts import SYSTEM_PROMPT
from pydub import AudioSegment
import pypdf
import os
import io
import tempfile

def estimate_audio_length(text):
    # Estimate 150 words per minute
    word_count = len(text.split())
    return word_count / 150  # Returns length in minutes

def trim_dialogue(dialogue, target_length_minutes):
    trimmed_dialogue = []
    current_length = 0
    target_length_seconds = target_length_minutes * 60

    for item in dialogue.dialogue:
        item_length = estimate_audio_length(item.text)
        if current_length + item_length > target_length_minutes:
            # Trim this item to fit
            words = item.text.split()
            words_to_keep = int((target_length_minutes - current_length) * 150)
            item.text = " ".join(words[:words_to_keep]) + "..."
            trimmed_dialogue.append(item)
            break
        trimmed_dialogue.append(item)
        current_length += item_length

    return trimmed_dialogue

def generate_podcast(file, tone, length):
    # Extract text from PDF
    if not file.name.lower().endswith('.pdf'):
        raise gr.Error("Please upload a PDF file.")
    
    try:
        pdf_reader = pypdf.PdfReader(file.name)
        text = ""
        for page in pdf_reader.pages:
            text += page.extract_text()
    except Exception as e:
        raise gr.Error(f"Error reading the PDF file: {str(e)}")
    
    # Truncate text to 2048 tokens
    truncated_text = truncate_text(text)
    if len(truncated_text) < len(text):
        print("Warning: The input text was truncated to fit within 2048 tokens.")
    
    # Generate script
    try:
        script = generate_script(SYSTEM_PROMPT, truncated_text, tone)
    except Exception as e:
        raise gr.Error(f"Error generating script: {str(e)}")
    
    # Determine target length in minutes
    target_length = 2 if length == "Short (1-2 min)" else 5

    # Trim dialogue to fit target length
    trimmed_dialogue = trim_dialogue(script, target_length)
    
    # Generate audio for each dialogue item
    audio_segments = []
    transcript = ""
    try:
        for item in trimmed_dialogue:
            audio_file = generate_audio(item.text, item.speaker)
            audio_segment = AudioSegment.from_mp3(audio_file)
            audio_segments.append(audio_segment)
            transcript += f"**{item.speaker}**: {item.text}\n\n"
            os.remove(audio_file)  # Clean up temporary audio file
    except Exception as e:
        raise gr.Error(f"Error generating audio: {str(e)}")
    
    # Combine audio segments
    combined_audio = sum(audio_segments)
    
    # Ensure audio doesn't exceed target length
    target_length_ms = target_length * 60 * 1000
    if len(combined_audio) > target_length_ms:
        combined_audio = combined_audio[:target_length_ms]
    
    # Save combined audio to a temporary file
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
        combined_audio.export(temp_audio.name, format="mp3")
        temp_audio_path = temp_audio.name
    
    return temp_audio_path, transcript

# Gradio interface setup remains the same
instructions = """
# Podcast Generator

Welcome to the Podcast Generator project! This tool allows you to create custom podcast episodes using AI-generated content.

## Features
* Generate podcast scripts from PDF content
* Convert text to speech for a natural listening experience
* Choose the tone of your podcast
* Export episodes as MP3 files

## How to Use
1. Upload a PDF file (content will be truncated to 2048 tokens if longer)
2. Select the desired tone (humorous, casual, formal)
3. Choose the podcast length
4. Click "Generate" to create your podcast
5. Listen to the generated audio and review the transcript

Note: This tool uses the LLaMa 3.1 70B model for script generation and gTTS for text-to-speech conversion. The input is limited to 2048 tokens to ensure compatibility with the model.
"""

iface = gr.Interface(
    fn=generate_podcast,
    inputs=[
        gr.File(label="Upload PDF file", file_types=[".pdf"]),
        gr.Radio(["humorous", "casual", "formal"], label="Select podcast tone", value="casual"),
        gr.Radio(["Short (1-2 min)", "Medium (3-5 min)"], label="Podcast length", value="Medium (3-5 min)")
    ],
    outputs=[
        gr.Audio(label="Generated Podcast"),
        gr.Markdown(label="Transcript")
    ],
    title="Custom NotebookLM-type Podcast Generator (2048 token limit)",
    description=instructions,
    allow_flagging="never",
    theme=gr.themes.Soft()
)

if __name__ == "__main__":
    iface.launch()