Spaces:

Andy1621
/

uniformer_image_demo

Runtime error

File size: 3,068 Bytes

e98dbd2
 
 
 
 
a29447f
d969a42
e98dbd2
 
856cff1
e98dbd2
 
 
 
856cff1
ebdd576
e98dbd2
 
856cff1
 
a29447f
e98dbd2
 
 
 
 
 
 
 
 
 
ece2f26
e98dbd2
 
 
 
 
 
 
 
 
 
 
 
ece2f26
3def095
e98dbd2
c9409e6
ece2f26
ffe2dca
ece2f26
 
bd95e10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e98dbd2
bd95e10
 
e98dbd2
bd95e10

import os

import torch
import torch.nn.functional as F
import torchvision.transforms as T
from uniformer import uniformer_small
from imagenet_class_index import imagenet_classnames

import gradio as gr
from huggingface_hub import hf_hub_download

# Device on which to run the model
# Set to cuda to load on GPU
device = "cpu"
# os.system("wget https://cdn-lfs.huggingface.co/Andy1621/uniformer/fd192c31f8bd77670de8f171111bd51f56fd87e6aea45043ab2edc181e1fa775")
model_path = hf_hub_download(repo_id="Andy1621/uniformer", filename="uniformer_small_in1k.pth")
# Pick a pretrained model 
model = uniformer_small()
# state_dict = torch.load('fd192c31f8bd77670de8f171111bd51f56fd87e6aea45043ab2edc181e1fa775', map_location='cpu')
state_dict = torch.load(model_path, map_location='cpu')
model.load_state_dict(state_dict['model'])

# Set to eval mode and move to desired device
model = model.to(device)
model = model.eval()

# Create an id to label name mapping
imagenet_id_to_classname = {}
for k, v in imagenet_classnames.items():
    imagenet_id_to_classname[k] = v[1] 


def inference(img):
    image = img
    image_transform = T.Compose(
    [
        T.Resize(224),
        T.CenterCrop(224),
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
    )
    image = image_transform(image)
    
    # The model expects inputs of shape: B x C x H x W
    image = image.unsqueeze(0)
    
    prediction = model(image)
    prediction = F.softmax(prediction, dim=1).flatten()

    return {imagenet_id_to_classname[str(i)]: float(prediction[i]) for i in range(1000)}

def set_example_image(example: list) -> dict:
    return gr.Image.update(value=example[0])


demo = gr.Blocks()
with demo:
    gr.Markdown(
        """
        # UniFormer-S
        Gradio demo for <a href='https://github.com/Sense-X/UniFormer' target='_blank'>UniFormer</a>: To use it, simply upload your image, or click one of the examples to load them. Read more at the links below.
        """
    )

    with gr.Box():
        with gr.Row():
                with gr.Column():
                    with gr.Row():
                        input_image = gr.Image(label='Input Image', type='pil')
                    with gr.Row():
                        submit_button = gr.Button('Submit')
                with gr.Column():
                    label = gr.Label(num_top_classes=5)
        with gr.Row():
            example_images = gr.Dataset(components=[input_image], samples=[['library.jpeg'], ['cat.png'], ['dog.png'], ['panda.png']])

    gr.Markdown(
        """
        <p style='text-align: center'><a href='https://arxiv.org/abs/2201.09450' target='_blank'>UniFormer: Unifying Convolution and Self-attention for Visual Recognition</a> | <a href='https://github.com/Sense-X/UniFormer' target='_blank'>Github Repo</a></p>
        """
    )

    submit_button.click(fn=inference, inputs=input_image, outputs=label)
    example_images.click(fn=set_example_image, inputs=example_images, outputs=example_images.components)

demo.launch(enable_queue=True)