siddhartharya's picture
Update app.py
83b685c verified
raw
history blame
2.91 kB
import gradio as gr
import PyPDF2
import docx
import requests
from bs4 import BeautifulSoup
from groq import Groq
from gtts import gTTS
from pydub import AudioSegment
import os
import io
# Initialize Groq client
groq_client = Groq()
def extract_text(file_or_url):
if isinstance(file_or_url, str): # URL
response = requests.get(file_or_url)
soup = BeautifulSoup(response.text, 'html.parser')
return soup.get_text()
elif file_or_url is not None:
if file_or_url.name.endswith('.pdf'):
reader = PyPDF2.PdfReader(file_or_url.file)
return ' '.join([page.extract_text() for page in reader.pages])
elif file_or_url.name.endswith('.docx'):
doc = docx.Document(file_or_url.file)
return ' '.join([para.text for para in doc.paragraphs])
return ""
def generate_podcast_script(text):
prompt = f"""Generate a podcast script between a man and a woman discussing the following text:
{text}
The podcast should be informative and engaging, with a natural conversation flow.
Limit the script to approximately 750 words to fit within a 5-minute podcast."""
response = groq_client.chat.completions.create(
messages=[
{"role": "system", "content": "You are an AI assistant that generates podcast scripts based on given text."},
{"role": "user", "content": prompt}
],
model="llama-3.1-70b-versatile", # Using LLaMa 3.1 70B model
max_tokens=1000,
temperature=0.7
)
return response.choices[0].message.content
def text_to_speech(script):
lines = script.split('\n')
audio_segments = []
for line in lines:
if line.startswith("Man:"):
tts = gTTS(line[4:], lang='en', tld='co.uk')
elif line.startswith("Woman:"):
tts = gTTS(line[6:], lang='en', tld='com.au')
else:
continue
buffer = io.BytesIO()
tts.write_to_fp(buffer)
buffer.seek(0)
audio_segments.append(AudioSegment.from_mp3(buffer))
final_audio = sum(audio_segments)
final_audio = final_audio[:300000] # Trim to 5 minutes (300,000 ms)
buffer = io.BytesIO()
final_audio.export(buffer, format="mp3")
buffer.seek(0)
return buffer
def generate_podcast(file_or_url):
text = extract_text(file_or_url)
if not text:
return None, "Failed to extract text. Please check your input."
script = generate_podcast_script(text)
audio_file = text_to_speech(script)
return audio_file, script
iface = gr.Interface(
fn=generate_podcast,
inputs=[
gr.File(label="Upload PDF/DOC file"),
gr.Textbox(label="Or enter URL")
],
outputs=[
gr.Audio(label="Generated Podcast"),
gr.Textbox(label="Podcast Script")
],
title="Custom NotebookLM-type Podcast Generator (LLaMa 3.1 70B)"
)
iface.launch()