import utils from huggingface_hub.keras_mixin import from_pretrained_keras from PIL import Image import streamlit as st import tensorflow as tf # Inputs st.title("Input your image") image_url = st.text_input( label="URL of image", value="https://dl.fbaipublicfiles.com/dino/img.png", placeholder="https://your-favourite-image.png" ) # Outputs st.title("Original Image from URL") # Preprocess the same image but with normlization. image, preprocessed_image = utils.load_image_from_url( image_url, model_type="dino" ) st.image(image, caption="Original Image") st.title("Attention Heat Maps") # Load the DINO model dino = from_pretrained_keras("probing-vits/vit-dino-base16") # Get the attention scores _, attention_score_dict = dino.predict(preprocessed_image) # De-normalize the image for visual clarity. in1k_mean = tf.constant([0.485 * 255, 0.456 * 255, 0.406 * 255]) in1k_std = tf.constant([0.229 * 255, 0.224 * 255, 0.225 * 255]) preprocessed_img_orig = (preprocessed_image * in1k_std) + in1k_mean preprocessed_img_orig = preprocessed_img_orig / 255. preprocessed_img_orig = tf.clip_by_value(preprocessed_img_orig, 0.0, 1.0).numpy() attentions = utils.attention_heatmap( attention_score_dict=attention_score_dict, image=preprocessed_img_orig ) utils.plot(attentions=attentions, image=preprocessed_img_orig) # Show the attention maps image = Image.open("heat_map.png") st.image(image, caption="Attention Heat Maps")