Spaces:

bidit
/

tubegpt

Runtime error

App Files Files Community

tubegpt / app.py

bidit

remove image pre process

e007b42 about 2 years ago

raw

history blame contribute delete

2.23 kB

	import torch
	import torchvision.transforms as transforms
	import cv2
	from PIL import Image
	import pafy
	import gradio as gra
	from transformers import BlipProcessor, BlipForConditionalGeneration
	import youtube_dl

	model_id = "Salesforce/blip-image-captioning-base"



	# Set up the Hugging Face model
	model = BlipForConditionalGeneration.from_pretrained(model_id)
	processor = BlipProcessor.from_pretrained(model_id)

	def caption_youtube(url):

	# Open the video file
	cap = cv2.VideoCapture('video.mp4')

	# Loop through the frames of the video
	while cap.isOpened():
	# Read the next frame
	ret, frame = cap.read()

	# If we've reached the end of the video, break out of the loop
	if not ret:
	break

	# Convert the frame to a PIL image and apply the image transformation pipeline
	image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
	# image = image_transforms(image)

	# # Add batch dimension to the image tensor
	# image = image.unsqueeze(0)
	inputs = processor(image, return_tensors="pt")
	out = model.generate(**inputs)
	caption = processor.decode(out[0], skip_special_tokens=True)
	# Print the caption for the frame
	print(caption)

	# Release the video file

	cap.release()

	def user_greeting(name):
	return "Hi! " + name + " Welcome to your first Gradio application!😎"

	# URL of the YouTube video
	video_url = 'https://www.youtube.com/watch?v=orbkg5JH9C8'

	# Download the video using youtube_dl and extract the frames using OpenCV
	ydl_opts = {
	'format': 'best',
	'outtmpl': 'video.mp4'
	}
	with youtube_dl.YoutubeDL(ydl_opts) as ydl:
	ydl.download([video_url])

	app = gra.Interface(fn=caption_youtube, inputs='text', outputs="text")
	app.launch()