Andy1621 commited on
Commit
e98dbd2
1 Parent(s): e9e2587

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from typing import List
4
+
5
+ import torch
6
+ import torch.nn.functional as F
7
+ import torchvision.transforms as T
8
+ from PIL import Image
9
+ from torchvision.transforms._transforms_video import NormalizeVideo
10
+ from uniformer import uniformer_small, uniformer_base, uniformer_small_plus, uniformer_base_ls
11
+
12
+ import gradio as gr
13
+
14
+ # Device on which to run the model
15
+ # Set to cuda to load on GPU
16
+ device = "cpu"
17
+ os.system("https://huggingface.co/Andy1621/uniformer/blob/main/uniformer_small_in1k.pth")
18
+ # Pick a pretrained model
19
+ model = uniformer_small()
20
+ state_dict = torch.load('uniformer_small_in1k.pth', map_location='cpu')
21
+ model.load_state_dict(state_dict)
22
+
23
+ # Set to eval mode and move to desired device
24
+ model = model.to(device)
25
+ model = model.eval()
26
+
27
+ os.system("wget https://huggingface.co/Andy1621/uniformer/blob/main/imagenet_class_index.json")
28
+
29
+ with open("imagenet_class_index.json", "r") as f:
30
+ imagenet_classnames = json.load(f)
31
+
32
+ # Create an id to label name mapping
33
+ imagenet_id_to_classname = {}
34
+ for k, v in imagenet_classnames.items():
35
+ imagenet_id_to_classname[k] = v[1]
36
+
37
+ os.system("wget https://upload.wikimedia.org/wikipedia/commons/thumb/c/c5/13-11-02-olb-by-RalfR-03.jpg/800px-13-11-02-olb-by-RalfR-03.jpg -O library.jpg")
38
+
39
+ def inference(img):
40
+ image = img
41
+ image_transform = T.Compose(
42
+ [
43
+ T.Resize(224),
44
+ T.CenterCrop(224),
45
+ T.ToTensor(),
46
+ T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
47
+ ]
48
+ )
49
+ image = image_transform(image)
50
+
51
+ # The model expects inputs of shape: B x C x T x H x W
52
+ image = image[None, :, None, ...]
53
+
54
+ prediction = model(image, input_type="image")
55
+ prediction = F.softmax(prediction, dim=1)
56
+ pred_classes = prediction.topk(k=5).indices
57
+
58
+ pred_class_names = [imagenet_id_to_classname[str(i.item())] for i in pred_classes[0]]
59
+ return "Top 5 predicted labels: %s" % ", ".join(pred_class_names)
60
+
61
+ inputs = gr.inputs.Image(type='pil')
62
+ outputs = gr.outputs.Textbox(label="Output")
63
+
64
+ title = "UniFormer-S"
65
+
66
+ description = "Gradio demo for UniFormer: To use it, simply upload your image, or click one of the examples to load them. Read more at the links below."
67
+
68
+ article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2201.09450' target='_blank'>UniFormer: Unifying Convolution and Self-attention for Visual Recognition</a> | <a href='https://github.com/Sense-X/UniFormer' target='_blank'>Github Repo</a></p>"
69
+
70
+
71
+ gr.Interface(inference, inputs, outputs, title=title, description=description, article=article, examples=[['library.jpg']]).launch(enable_queue=True,cache_examples=True)