siddhartharya commited on
Commit
ecb2850
·
verified ·
1 Parent(s): 7ed5630

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -33
app.py CHANGED
@@ -1,49 +1,89 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
  import PyPDF2
4
  import docx
5
  import requests
 
 
 
 
 
 
6
 
7
- # Load the Llama model from Groq Cloud
8
- model = pipeline("text-to-speech", model="groq/llama-3.1-70b")
9
-
10
- def extract_text_from_file(file_path):
11
- if file_path.endswith('.pdf'):
12
- with open(file_path, 'rb') as file:
13
- reader = PyPDF2.PdfReader(file)
14
- text = ""
15
- for page in reader.pages:
16
- text += page.extract_text()
17
- return text
18
- elif file_path.endswith('.docx'):
19
- doc = docx.Document(file_path)
20
- text = "\n".join([para.text for para in doc.paragraphs])
21
- return text
22
  return ""
23
 
24
- def fetch_text_from_url(url):
25
- response = requests.get(url)
26
- return response.text if response.status_code == 200 else ""
27
-
28
- def generate_podcast(uploaded_file=None, url=None):
29
- if uploaded_file:
30
- input_text = extract_text_from_file(uploaded_file.name)
31
- elif url:
32
- input_text = fetch_text_from_url(url)
33
- else:
34
- return "No input provided."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
- dialogue = f"Man: {input_text}\nWoman: {input_text} (response)"
37
- audio = model(dialogue)
38
- return audio
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  iface = gr.Interface(
41
  fn=generate_podcast,
42
  inputs=[
43
- gr.inputs.File(label="Upload PDF or DOC"),
44
- gr.inputs.Textbox(label="Or enter URL"),
 
 
 
 
45
  ],
46
- outputs=gr.outputs.Audio(label="Generated Podcast"),
47
  )
48
 
49
  iface.launch()
 
1
  import gradio as gr
 
2
  import PyPDF2
3
  import docx
4
  import requests
5
+ from bs4 import BeautifulSoup
6
+ from groq import Groq
7
+ from gtts import gTTS
8
+ from pydub import AudioSegment
9
+ import os
10
+ import io
11
 
12
+ # Initialize Groq client
13
+ groq_client = Groq(api_key=os.environ["GROQ_API_KEY"])
14
+
15
+ def extract_text(file_or_url):
16
+ if isinstance(file_or_url, str): # URL
17
+ response = requests.get(file_or_url)
18
+ soup = BeautifulSoup(response.text, 'html.parser')
19
+ return soup.get_text()
20
+ elif file_or_url is not None:
21
+ if file_or_url.name.endswith('.pdf'):
22
+ reader = PyPDF2.PdfReader(file_or_url.file)
23
+ return ' '.join([page.extract_text() for page in reader.pages])
24
+ elif file_or_url.name.endswith('.docx'):
25
+ doc = docx.Document(file_or_url.file)
26
+ return ' '.join([para.text for para in doc.paragraphs])
27
  return ""
28
 
29
+ def generate_podcast_script(text):
30
+ prompt = f"""Generate a podcast script between a man and a woman discussing the following text:
31
+ {text}
32
+ The podcast should be informative and engaging, with a natural conversation flow.
33
+ Limit the script to approximately 750 words to fit within a 5-minute podcast."""
34
+
35
+ response = groq_client.chat.completions.create(
36
+ messages=[
37
+ {"role": "system", "content": "You are an AI assistant that generates podcast scripts based on given text."},
38
+ {"role": "user", "content": prompt}
39
+ ],
40
+ model="llama-3.1-70b-versatile", # Using LLaMa 3.1 70B model
41
+ max_tokens=1000,
42
+ temperature=0.7
43
+ )
44
+ return response.choices[0].message.content
45
+
46
+ def text_to_speech(script):
47
+ lines = script.split('\n')
48
+ audio_segments = []
49
+ for line in lines:
50
+ if line.startswith("Man:"):
51
+ tts = gTTS(line[4:], lang='en', tld='co.uk')
52
+ elif line.startswith("Woman:"):
53
+ tts = gTTS(line[6:], lang='en', tld='com.au')
54
+ else:
55
+ continue
56
+ buffer = io.BytesIO()
57
+ tts.write_to_fp(buffer)
58
+ buffer.seek(0)
59
+ audio_segments.append(AudioSegment.from_mp3(buffer))
60
 
61
+ final_audio = sum(audio_segments)
62
+ final_audio = final_audio[:300000] # Trim to 5 minutes (300,000 ms)
63
+ buffer = io.BytesIO()
64
+ final_audio.export(buffer, format="mp3")
65
+ buffer.seek(0)
66
+ return buffer
67
+
68
+ def generate_podcast(file_or_url):
69
+ text = extract_text(file_or_url)
70
+ if not text:
71
+ return None, "Failed to extract text. Please check your input."
72
+ script = generate_podcast_script(text)
73
+ audio_file = text_to_speech(script)
74
+ return audio_file, script
75
 
76
  iface = gr.Interface(
77
  fn=generate_podcast,
78
  inputs=[
79
+ gr.File(label="Upload PDF/DOC file"),
80
+ gr.Textbox(label="Or enter URL")
81
+ ],
82
+ outputs=[
83
+ gr.Audio(label="Generated Podcast"),
84
+ gr.Textbox(label="Podcast Script")
85
  ],
86
+ title="Custom NotebookLM-type Podcast Generator (LLaMa 3.1 70B)"
87
  )
88
 
89
  iface.launch()