Spaces:

ardha27
/

VideoAnalyzer

Runtime error

App Files Files Community

VideoAnalyzer / app.py

Zeph27

process time

e274fdd 11 months ago

raw

history blame

2.77 kB

	import gradio as gr
	import torch
	from transformers import AutoModel, AutoTokenizer
	from PIL import Image
	from decord import VideoReader, cpu
	import base64
	import io
	import spaces
	import time

	# Load model
	model_path = 'openbmb/MiniCPM-V-2_6'
	model = AutoModel.from_pretrained(model_path, trust_remote_code=True, torch_dtype=torch.bfloat16)
	model = model.to(device='cuda')
	tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
	model.eval()

	MAX_NUM_FRAMES = 64

	def encode_image(image):
	if not isinstance(image, Image.Image):
	image = Image.open(image).convert("RGB")
	max_size = 448*16
	if max(image.size) > max_size:
	w,h = image.size
	if w > h:
	new_w = max_size
	new_h = int(h * max_size / w)
	else:
	new_h = max_size
	new_w = int(w * max_size / h)
	image = image.resize((new_w, new_h), resample=Image.BICUBIC)
	return image

	def encode_video(video_path):
	vr = VideoReader(video_path, ctx=cpu(0))
	sample_fps = round(vr.get_avg_fps() / 1)
	frame_idx = [i for i in range(0, len(vr), sample_fps)]
	if len(frame_idx) > MAX_NUM_FRAMES:
	frame_idx = frame_idx[:MAX_NUM_FRAMES]
	video = vr.get_batch(frame_idx).asnumpy()
	video = [Image.fromarray(v.astype('uint8')) for v in video]
	video = [encode_image(v) for v in video]
	return video

	@spaces.GPU
	def analyze_video(prompt, video):
	start_time = time.time()

	if isinstance(video, str):
	video_path = video
	else:
	video_path = video.name

	encoded_video = encode_video(video_path)

	context = [
	{"role": "user", "content": [prompt] + encoded_video}
	]

	params = {
	'sampling': True,
	'top_p': 0.8,
	'top_k': 100,
	'temperature': 0.7,
	'repetition_penalty': 1.05,
	"max_new_tokens": 2048,
	"max_inp_length": 4352,
	"use_image_id": False,
	"max_slice_nums": 1 if len(encoded_video) > 16 else 2
	}

	response = model.chat(image=None, msgs=context, tokenizer=tokenizer, **params)

	end_time = time.time()
	processing_time = end_time - start_time

	return f"Analysis Result:\n{response}\n\nProcessing Time: {processing_time:.2f} seconds"

	with gr.Blocks() as demo:
	gr.Markdown("# Video Analyzer")
	with gr.Row():
	with gr.Column():
	prompt_input = gr.Textbox(label="Prompt")
	video_input = gr.Video(label="Upload Video")
	with gr.Column():
	output = gr.Textbox(label="Analysis Result and Processing Time")

	analyze_button = gr.Button("Analyze Video")
	analyze_button.click(fn=analyze_video, inputs=[prompt_input, video_input], outputs=output)

	demo.launch()