import gradio as gr from moviepy.editor import VideoFileClip from PIL import Image, ImageDraw, ImageFont import whisper from keybert import KeyBERT import numpy as np # Load Whisper model and KeyBERT model whisper_model = whisper.load_model("base") kw_model = KeyBERT() def process_video(video_path, caption="Your Caption"): # Extract frame at 5 seconds clip = VideoFileClip(video_path) frame = clip.get_frame(5) # 5 seconds image = Image.fromarray(np.uint8(frame)) # Add caption draw = ImageDraw.Draw(image) font = ImageFont.truetype("arial.ttf", 40) # Make sure Arial.ttf is available text_position = (50, image.height - 100) draw.text(text_position, caption, (255, 255, 255), font=font) thumbnail_path = "thumbnail.jpg" image.save(thumbnail_path) # Extract keywords result = whisper_model.transcribe(video_path) text = result["text"] keywords = kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), stop_words='english') keywords_list = [kw[0] for kw in keywords] return thumbnail_path, ", ".join(keywords_list) # Gradio UI with gr.Blocks() as demo: gr.Markdown("# Video Thumbnail Generator with SEO Keywords") video_input = gr.File(label="Upload Video", type="filepath") caption_input = gr.Textbox(label="Enter Caption for Thumbnail", value="Awesome Video!") generate_button = gr.Button("Generate Thumbnail & Keywords") thumbnail_output = gr.Image(label="Generated Thumbnail") keywords_output = gr.Textbox(label="SEO Keywords") generate_button.click(process_video, inputs=[video_input, caption_input], outputs=[thumbnail_output, keywords_output]) # Launch in Hugging Face Spaces demo.launch()