Spaces:

siddhartharya
/

My_NotebookLM_Podcast_Generator

Running

App Files Files Community

My_NotebookLM_Podcast_Generator / app.py

siddhartharya

Update app.py

6b14ed6 verified 10 months ago

raw

history blame

4.68 kB

	import gradio as gr
	from utils import generate_script, generate_audio, truncate_text
	from prompts import SYSTEM_PROMPT
	from pydub import AudioSegment
	import pypdf
	import os
	import io
	import tempfile

	def estimate_audio_length(text):
	# Estimate 150 words per minute
	word_count = len(text.split())
	return word_count / 150 # Returns length in minutes

	def trim_dialogue(dialogue, target_length_minutes):
	trimmed_dialogue = []
	current_length = 0
	target_length_seconds = target_length_minutes * 60

	for item in dialogue.dialogue:
	item_length = estimate_audio_length(item.text)
	if current_length + item_length > target_length_minutes:
	# Trim this item to fit
	words = item.text.split()
	words_to_keep = int((target_length_minutes - current_length) * 150)
	item.text = " ".join(words[:words_to_keep]) + "..."
	trimmed_dialogue.append(item)
	break
	trimmed_dialogue.append(item)
	current_length += item_length

	return trimmed_dialogue

	def generate_podcast(file, tone, length):
	# Extract text from PDF
	if not file.name.lower().endswith('.pdf'):
	raise gr.Error("Please upload a PDF file.")

	try:
	pdf_reader = pypdf.PdfReader(file.name)
	text = ""
	for page in pdf_reader.pages:
	text += page.extract_text()
	except Exception as e:
	raise gr.Error(f"Error reading the PDF file: {str(e)}")

	# Truncate text to 2048 tokens
	truncated_text = truncate_text(text)
	if len(truncated_text) < len(text):
	print("Warning: The input text was truncated to fit within 2048 tokens.")

	# Generate script
	try:
	script = generate_script(SYSTEM_PROMPT, truncated_text, tone)
	except Exception as e:
	raise gr.Error(f"Error generating script: {str(e)}")

	# Determine target length in minutes
	target_length = 2 if length == "Short (1-2 min)" else 5

	# Trim dialogue to fit target length
	trimmed_dialogue = trim_dialogue(script, target_length)

	# Generate audio for each dialogue item
	audio_segments = []
	transcript = ""
	try:
	for item in trimmed_dialogue:
	audio_file = generate_audio(item.text, item.speaker)
	audio_segment = AudioSegment.from_mp3(audio_file)
	audio_segments.append(audio_segment)
	transcript += f"{item.speaker}: {item.text}\n\n"
	os.remove(audio_file) # Clean up temporary audio file
	except Exception as e:
	raise gr.Error(f"Error generating audio: {str(e)}")

	# Combine audio segments
	combined_audio = sum(audio_segments)

	# Ensure audio doesn't exceed target length
	target_length_ms = target_length * 60 * 1000
	if len(combined_audio) > target_length_ms:
	combined_audio = combined_audio[:target_length_ms]

	# Save combined audio to a temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
	combined_audio.export(temp_audio.name, format="mp3")
	temp_audio_path = temp_audio.name

	return temp_audio_path, transcript

	# Gradio interface setup remains the same
	instructions = """
	# Podcast Generator

	Welcome to the Podcast Generator project! This tool allows you to create custom podcast episodes using AI-generated content.

	## Features
	* Generate podcast scripts from PDF content
	* Convert text to speech for a natural listening experience
	* Choose the tone of your podcast
	* Export episodes as MP3 files

	## How to Use
	1. Upload a PDF file (content will be truncated to 2048 tokens if longer)
	2. Select the desired tone (humorous, casual, formal)
	3. Choose the podcast length
	4. Click "Generate" to create your podcast
	5. Listen to the generated audio and review the transcript

	Note: This tool uses the LLaMa 3.1 70B model for script generation and gTTS for text-to-speech conversion. The input is limited to 2048 tokens to ensure compatibility with the model.
	"""

	iface = gr.Interface(
	fn=generate_podcast,
	inputs=[
	gr.File(label="Upload PDF file", file_types=[".pdf"]),
	gr.Radio(["humorous", "casual", "formal"], label="Select podcast tone", value="casual"),
	gr.Radio(["Short (1-2 min)", "Medium (3-5 min)"], label="Podcast length", value="Medium (3-5 min)")
	],
	outputs=[
	gr.Audio(label="Generated Podcast"),
	gr.Markdown(label="Transcript")
	],
	title="Custom NotebookLM-type Podcast Generator (2048 token limit)",
	description=instructions,
	allow_flagging="never",
	theme=gr.themes.Soft()
	)

	if __name__ == "__main__":
	iface.launch()