Spaces:

Vidushee
/

AdBind

Sleeping

App Files Files Community

AdBind / Zocket_ImageBind.py

Vidushee

Update Zocket_ImageBind.py

c8b2ee9 verified 7 months ago

raw

history blame

1.68 kB

	from imagebind import data
	import torch
	from imagebind.models import imagebind_model
	from imagebind.models.imagebind_model import ModalityType
	import os

	from PIL import Image
	import streamlit as st
	import tempfile

	device = "cuda:0" if torch.cuda.is_available() else "cpu"

	# Instantiate model
	model = imagebind_model.imagebind_huge(pretrained=True)
	model.eval()
	model.to(device)

	text_list = ["An Advertisement(branding, text, promotions, lifestyle depiction, contextual cues, and visual composition)","Not an Advertisement"]
	image_paths = []

	text = ['Advertisement Creative(Contains Text)', 'Not an Advertisement Creative(Contains No Text)', 'Simple Product Image and not an Advertisement)']



	st.title("Advertisement Detection using CLIP")

	# Upload image
	uploaded_image = st.file_uploader("Choose an image...", type= ["png", "jpg", "jpeg"])


	if uploaded_image is not None:
	temp_dir = tempfile.mkdtemp()
	path = os.path.join(temp_dir, uploaded_image.name)
	with open(path, "wb") as f:
	f.write(uploaded_image.getvalue())

	image_paths.append(path)
	image = Image.open(uploaded_image)
	st.image(image, caption="Uploaded Image.", use_column_width=True)


	inputs = {
	ModalityType.TEXT: data.load_and_transform_text(text_list, device),
	ModalityType.VISION: data.load_and_transform_vision_data(image_paths, device),
	}

	with torch.no_grad():
	embeddings = model(inputs)

	print(
	"Vision x Text: ",
	torch.softmax(embeddings[ModalityType.VISION] @ embeddings[ModalityType.TEXT].T, dim=-1),
	)

	st.write(torch.softmax(embeddings[ModalityType.VISION] @ embeddings[ModalityType.TEXT].T, dim=-1))