yt-summarize / app.py
hermanda's picture
Update app.py
77bc72b verified
import sys
from google import genai
import subprocess
import os
import shutil
import gradio as gr
import uuid
import subprocess
def download_subtitles(video_url):
# Execute the bash script and capture the output
# result = subprocess.run(
# ['bash', 'download_subtitles.sh', url],
# check=True,
# text=True,
# stdout=subprocess.PIPE,
# stderr=subprocess.PIPE
# )
# # Extract the last line from stdout which is the directory name
# stdout_lines = result.stdout.strip().split('\n')
# directory = stdout_lines[-1].strip()
uuid_dir = str(uuid.uuid4())
# First command for auto-generated subtitles
subprocess.run([
"yt-dlp",
"--write-auto-subs",
"--sub-lang", "en",
"--convert-subs", "srt",
"--skip-download",
"-P", f"home:{uuid_dir}",
video_url
], check=True)
# Second command for regular subtitles
subprocess.run([
"yt-dlp",
"--write-subs",
"--sub-lang", "en",
"--convert-subs", "srt",
"--skip-download",
"-P", f"home:{uuid_dir}",
video_url
], check=True)
directory = uuid_dir
# Verify the directory exists
if not os.path.isdir(directory):
raise FileNotFoundError(f"Directory {directory} does not exist")
# Find the .srt file in the directory
print( os.listdir(directory))
srt_files = [f for f in os.listdir(directory) if f.endswith('.srt')]
if not srt_files:
raise FileNotFoundError(f"No .srt file found in {directory}")
if len(srt_files) > 1:
raise RuntimeError(f"Multiple .srt files found in {directory}")
srt_path = os.path.join(directory, srt_files[0])
return srt_path
def cleanup_directory(folder_path):
# Check if the folder exists
if not os.path.exists(folder_path):
raise FileNotFoundError(f"The directory {folder_path} does not exist")
# Remove the directory and all its contents
shutil.rmtree(folder_path)
def srt_to_text(input_file):
try:
with open(input_file, "r", encoding="utf-8") as f:
content = f.read()
except FileNotFoundError:
print(f"Error: Input file '{input_file}' not found")
sys.exit(1)
entries = content.strip().split("\n\n")
output_lines = []
for entry in entries:
lines = entry.strip().split("\n")
if len(lines) < 3:
continue
text_lines = lines[2:]
for line in text_lines:
stripped_line = line.strip()
if stripped_line:
if not output_lines or stripped_line != output_lines[-1]:
output_lines.append(stripped_line)
return "\n".join(output_lines)
# url = "https://www.youtube.com/watch?v=B1dWbiXnz_s"
# subtitlesfile = download_subtitles(url)
# video_text = srt_to_text(subtitlesfile)
# cleanup_directory(os.path.dirname(subtitlesfile))
# GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
# client = genai.Client(api_key=GEMINI_API_KEY)
# response = client.models.generate_content(
# model='gemini-2.0-flash',
# contents=f"Summarize following text chronollogically, make it long, use markdown: \n{video_text}",
# )
# print(response.text)
def get_transcript_text(url):
# try:
print("Downloading subtitles...")
subtitlesfile = download_subtitles(url)
print("Extracting text from subtitles...")
video_text = srt_to_text(subtitlesfile)
print("Cleaning up...")
cleanup_directory(os.path.dirname(subtitlesfile))
return video_text
# except Exception as e:
# raise gr.Error(f"Error retrieving transcript: {e}")
def summarize_video(url, prompt):
# try:
video_text = get_transcript_text(url)
client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
final_prompt = prompt + "\n" + video_text
print("Generating summary...")
response = client.models.generate_content(
model='gemini-2.0-flash',
contents=final_prompt,
)
summary = response.text
return summary
# except Exception as e:
# return f"An error occurred: {str(e)}"
with gr.Blocks() as app:
gr.Markdown("# YouTube Video Summarizer")
with gr.Row():
with gr.Column(scale=5):
url_input = gr.Textbox(label="YouTube URL", placeholder="Enter YouTube URL here...")
with gr.Column(scale=1):
summarize_btn = gr.Button("Summarize", variant="primary")
default_prompt = """Summarize the following text chronologically, make it long, use markdown:"""
prompt_input = gr.Textbox(label="Prompt", value=default_prompt, lines=4)
output = gr.Markdown()
summarize_btn.click(
fn=summarize_video,
inputs=[url_input, prompt_input],
outputs=output
)
if __name__ == "__main__":
app.launch()