Spaces:

amezi
/

blip

Paused

blip / src /embedder.py

more minor bux fixedd

2e8783e 5 months ago

1.19 kB

	import torch
	import numpy as np
	from transformers import AutoProcessor, AutoModel
	import decord

	class InternVLEmbedder:
	def __init__(self):
	self.device = "cuda" if torch.cuda.is_available() else "cpu"
	self.model = AutoModel.from_pretrained("OpenGVLab/InternVL2_5-1B-MPO", trust_remote_code=True).to(self.device)
	self.processor = AutoProcessor.from_pretrained("OpenGVLab/InternVL2_5-1B-MPO", trust_remote_code=True)

	def embed_video(self, video_path):
	vr = decord.VideoReader(video_path)
	frames = np.stack([vr[i].asnumpy() for i in np.linspace(0, len(vr)-1, 8).astype(int)])
	tensor = torch.tensor(frames).permute(0, 3, 1, 2).unsqueeze(0).to(self.device)

	with torch.no_grad():
	video_vector = self.model.get_video_features(tensor).squeeze(0).cpu().numpy()

	return video_vector / np.linalg.norm(video_vector)

	def embed_text(self, text):
	inputs = self.processor(text=[text], return_tensors="pt").to(self.device)

	with torch.no_grad():
	text_vector = self.model.get_text_features(**inputs).squeeze(0).cpu().numpy()

	return text_vector / np.linalg.norm(text_vector)