innat commited on
Commit
8ffd571
1 Parent(s): 29eaf44

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -4
app.py CHANGED
@@ -1,7 +1,90 @@
1
  import gradio as gr
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import numpy as np
3
+ import zipfile
4
+ import imageio
5
 
6
+ import tensorflow as tf
7
+ from tensorflow import keras
8
 
9
+ from utils import read_video, frame_sampling
10
+ from utils import num_frames, patch_size, input_size
11
+ from labels import K400_label_map, SSv2_label_map
12
+
13
+
14
+ LABEL_MAPS = {
15
+ 'K400': K400_label_map,
16
+ 'SSv2': SSv2_label_map,
17
+ }
18
+
19
+ ALL_MODELS = [
20
+ 'TFVideoSwinT_K400_IN1K_P244_W877_32x224',
21
+ 'TFVideoSwinB_SSV2_K400_P244_W1677_32x224',
22
+ ]
23
+
24
+ sample_example = [
25
+ ["examples/k400.mp4", ALL_MODELS[0]],
26
+ ["examples/ssv2.mp4", ALL_MODELS[1]],
27
+ ]
28
+
29
+
30
+ def get_model(model_type):
31
+ model_path = keras.utils.get_file(
32
+ origin=f'https://github.com/innat/VideoSwin/releases/download/v1.1/{model_type}.zip',
33
+ )
34
+ with zipfile.ZipFile(model_path, 'r') as zip_ref:
35
+ zip_ref.extractall('./')
36
+
37
+ model = keras.models.load_model(model_type)
38
+
39
+ if 'K400' in model_type:
40
+ data_type = 'K400'
41
+ elif 'SSv2' in model_type:
42
+ data_type = 'SSv2'
43
+
44
+ label_map = LABEL_MAPS.get(data_type)
45
+ label_map = {v: k for k, v in label_map.items()}
46
+
47
+ return model, label_map
48
+
49
+
50
+ def inference(video_file, model_type):
51
+ # get sample data
52
+ container = read_video(video_file)
53
+ frames = frame_sampling(container, num_frames=num_frames)
54
+
55
+ # get models
56
+ model, label_map = get_model(model_type)
57
+ model.trainable = False
58
+
59
+ # inference on model
60
+ outputs = model(frames[None, ...], training=False)
61
+ probabilities = tf.nn.softmax(outputs).numpy().squeeze(0)
62
+ confidences = {
63
+ label_map[i]: float(probabilities[i]) for i in np.argsort(probabilities)[::-1]
64
+ }
65
+ return confidences
66
+
67
+
68
+ def main():
69
+ iface = gr.Interface(
70
+ fn=inference,
71
+ inputs=[
72
+ gr.Video(type="file", label="Input Video"),
73
+ gr.Dropdown(
74
+ choices=ALL_MODELS,
75
+ default="TFVideoSwinT_K400_IN1K_P244_W877_32x224",
76
+ label="Model"
77
+ )
78
+ ],
79
+ outputs=[
80
+ gr.Label(num_top_classes=3, label='scores'),
81
+ ],
82
+ examples=sample_example,
83
+ title="VideoSwin: Video Swin Transformer",
84
+ description="Keras reimplementation of <a href='https://github.com/innat/VideoSwin'>VideoSwin</a> is presented here."
85
+ )
86
+
87
+ iface.launch()
88
+
89
+ if __name__ == '__main__':
90
+ main()