|
import torch |
|
import torchvision.transforms as transforms |
|
import cv2 |
|
from PIL import Image |
|
import pafy |
|
import gradio as gra |
|
from transformers import BlipProcessor, BlipForConditionalGeneration |
|
import youtube_dl |
|
|
|
model_id = "Salesforce/blip-image-captioning-base" |
|
|
|
|
|
|
|
|
|
model = BlipForConditionalGeneration.from_pretrained(model_id) |
|
processor = BlipProcessor.from_pretrained(model_id) |
|
|
|
def caption_youtube(url): |
|
|
|
|
|
cap = cv2.VideoCapture('video.mp4') |
|
|
|
|
|
while cap.isOpened(): |
|
|
|
ret, frame = cap.read() |
|
|
|
|
|
if not ret: |
|
break |
|
|
|
|
|
image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) |
|
|
|
|
|
|
|
|
|
inputs = processor(image, return_tensors="pt") |
|
out = model.generate(**inputs) |
|
caption = processor.decode(out[0], skip_special_tokens=True) |
|
|
|
print(caption) |
|
|
|
|
|
|
|
cap.release() |
|
|
|
def user_greeting(name): |
|
return "Hi! " + name + " Welcome to your first Gradio application!π" |
|
|
|
|
|
video_url = 'https://www.youtube.com/watch?v=orbkg5JH9C8' |
|
|
|
|
|
ydl_opts = { |
|
'format': 'best', |
|
'outtmpl': 'video.mp4' |
|
} |
|
with youtube_dl.YoutubeDL(ydl_opts) as ydl: |
|
ydl.download([video_url]) |
|
|
|
app = gra.Interface(fn=caption_youtube, inputs='text', outputs="text") |
|
app.launch() |
|
|
|
|
|
|