Spaces:
Sleeping
Sleeping
import sys | |
from google import genai | |
import subprocess | |
import os | |
import shutil | |
import gradio as gr | |
import uuid | |
import subprocess | |
def download_subtitles(video_url): | |
# Execute the bash script and capture the output | |
# result = subprocess.run( | |
# ['bash', 'download_subtitles.sh', url], | |
# check=True, | |
# text=True, | |
# stdout=subprocess.PIPE, | |
# stderr=subprocess.PIPE | |
# ) | |
# # Extract the last line from stdout which is the directory name | |
# stdout_lines = result.stdout.strip().split('\n') | |
# directory = stdout_lines[-1].strip() | |
uuid_dir = str(uuid.uuid4()) | |
# First command for auto-generated subtitles | |
subprocess.run([ | |
"yt-dlp", | |
"--write-auto-subs", | |
"--sub-lang", "en", | |
"--convert-subs", "srt", | |
"--skip-download", | |
"-P", f"home:{uuid_dir}", | |
video_url | |
], check=True) | |
# Second command for regular subtitles | |
subprocess.run([ | |
"yt-dlp", | |
"--write-subs", | |
"--sub-lang", "en", | |
"--convert-subs", "srt", | |
"--skip-download", | |
"-P", f"home:{uuid_dir}", | |
video_url | |
], check=True) | |
directory = uuid_dir | |
# Verify the directory exists | |
if not os.path.isdir(directory): | |
raise FileNotFoundError(f"Directory {directory} does not exist") | |
# Find the .srt file in the directory | |
print( os.listdir(directory)) | |
srt_files = [f for f in os.listdir(directory) if f.endswith('.srt')] | |
if not srt_files: | |
raise FileNotFoundError(f"No .srt file found in {directory}") | |
if len(srt_files) > 1: | |
raise RuntimeError(f"Multiple .srt files found in {directory}") | |
srt_path = os.path.join(directory, srt_files[0]) | |
return srt_path | |
def cleanup_directory(folder_path): | |
# Check if the folder exists | |
if not os.path.exists(folder_path): | |
raise FileNotFoundError(f"The directory {folder_path} does not exist") | |
# Remove the directory and all its contents | |
shutil.rmtree(folder_path) | |
def srt_to_text(input_file): | |
try: | |
with open(input_file, "r", encoding="utf-8") as f: | |
content = f.read() | |
except FileNotFoundError: | |
print(f"Error: Input file '{input_file}' not found") | |
sys.exit(1) | |
entries = content.strip().split("\n\n") | |
output_lines = [] | |
for entry in entries: | |
lines = entry.strip().split("\n") | |
if len(lines) < 3: | |
continue | |
text_lines = lines[2:] | |
for line in text_lines: | |
stripped_line = line.strip() | |
if stripped_line: | |
if not output_lines or stripped_line != output_lines[-1]: | |
output_lines.append(stripped_line) | |
return "\n".join(output_lines) | |
# url = "https://www.youtube.com/watch?v=B1dWbiXnz_s" | |
# subtitlesfile = download_subtitles(url) | |
# video_text = srt_to_text(subtitlesfile) | |
# cleanup_directory(os.path.dirname(subtitlesfile)) | |
# GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") | |
# client = genai.Client(api_key=GEMINI_API_KEY) | |
# response = client.models.generate_content( | |
# model='gemini-2.0-flash', | |
# contents=f"Summarize following text chronollogically, make it long, use markdown: \n{video_text}", | |
# ) | |
# print(response.text) | |
def get_transcript_text(url): | |
# try: | |
print("Downloading subtitles...") | |
subtitlesfile = download_subtitles(url) | |
print("Extracting text from subtitles...") | |
video_text = srt_to_text(subtitlesfile) | |
print("Cleaning up...") | |
cleanup_directory(os.path.dirname(subtitlesfile)) | |
return video_text | |
# except Exception as e: | |
# raise gr.Error(f"Error retrieving transcript: {e}") | |
def summarize_video(url, prompt): | |
# try: | |
video_text = get_transcript_text(url) | |
client = genai.Client(api_key=os.getenv("GEMINI_API_KEY")) | |
final_prompt = prompt + "\n" + video_text | |
print("Generating summary...") | |
response = client.models.generate_content( | |
model='gemini-2.0-flash', | |
contents=final_prompt, | |
) | |
summary = response.text | |
return summary | |
# except Exception as e: | |
# return f"An error occurred: {str(e)}" | |
with gr.Blocks() as app: | |
gr.Markdown("# YouTube Video Summarizer") | |
with gr.Row(): | |
with gr.Column(scale=5): | |
url_input = gr.Textbox(label="YouTube URL", placeholder="Enter YouTube URL here...") | |
with gr.Column(scale=1): | |
summarize_btn = gr.Button("Summarize", variant="primary") | |
default_prompt = """Summarize the following text chronologically, make it long, use markdown:""" | |
prompt_input = gr.Textbox(label="Prompt", value=default_prompt, lines=4) | |
output = gr.Markdown() | |
summarize_btn.click( | |
fn=summarize_video, | |
inputs=[url_input, prompt_input], | |
outputs=output | |
) | |
if __name__ == "__main__": | |
app.launch() | |