sunana commited on
Commit
2ec5927
·
1 Parent(s): a9880fc

Upload 6 files

Browse files
Files changed (7) hide show
  1. .gitattributes +1 -0
  2. app.py +102 -13
  3. example_1.mp4 +0 -0
  4. example_2.mp4 +0 -0
  5. example_3.mp4 +3 -0
  6. example_4.mp4 +0 -0
  7. example_5.mp4 +0 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ example_3.mp4 filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -1,15 +1,104 @@
1
- import numpy as np
2
  import gradio as gr
 
 
 
 
 
 
 
3
 
4
- def sepia(input_img):
5
- sepia_filter = np.array([
6
- [0.393, 0.769, 0.189],
7
- [0.349, 0.686, 0.168],
8
- [0.272, 0.534, 0.131]
9
- ])
10
- sepia_img = input_img.dot(sepia_filter.T)
11
- sepia_img /= sepia_img.max()
12
- return sepia_img
13
-
14
- demo = gr.Interface(sepia, gr.Image(), "image")
15
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import torch
3
+ import math
4
+ import cv2
5
+ import os
6
+ import sys
7
+ import FFV1MT_MS
8
+ import flow_tools
9
 
10
+ model = FFV1MT_MS.FFV1DNN()
11
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
+
13
+ print('Number fo parameters: {}'.format(model.num_parameters()))
14
+ model.to(device)
15
+ model_dict = torch.load('Model_example.pth.tar')['state_dict']
16
+ # save model
17
+ model.load_state_dict(model_dict, strict=True)
18
+ model.eval()
19
+
20
+
21
+ def process_images(videos, x, y):
22
+ # read video file
23
+ cap = cv2.VideoCapture(videos)
24
+ # transform images to a list of images ndarray
25
+ images = []
26
+ while True:
27
+ ret, frame = cap.read()
28
+ if ret:
29
+ images.append(frame)
30
+ else:
31
+ break
32
+ if len(images) < 11:
33
+ print('video is too short')
34
+ return
35
+ # only use the first 11 frames
36
+ images = images[:11]
37
+ # transform images to a list of images tensor
38
+ images = [torch.from_numpy(img).permute(2, 0, 1).float().to(device).unsqueeze(0) / 255.0 for img in images]
39
+ # if the max size of the image is larger than 1024, resize the image to 1024 with same ratio
40
+ max_size = max(images[0].shape[2], images[0].shape[3])
41
+ if max_size > 832:
42
+ ratio = 832 / max_size
43
+ images = [torch.nn.functional.interpolate(img, scale_factor=ratio, mode='bicubic', align_corners=True) for img
44
+ in images]
45
+ # transform color image to gray image
46
+
47
+ result = model.forward_viz(images, layer=7, x=x, y=y)
48
+ flow = result['flow']
49
+ attention = result['attention']
50
+ activation = result['activation']
51
+
52
+ return [flow, activation, attention]
53
+
54
+
55
+ title = "Modelling Human Visual Motion Processing with Trainable Motion Energy Sensing and a Self-attention Network "
56
+ description = "## Introduction(^_^)\n" \
57
+ " The intersection of cognitive neuroscience and computer vision offers exciting advancements in " \
58
+ "how machines perceive motion. Our research bridges the gap between these fields by proposing a novel " \
59
+ "image-computable model that aligns with human motion perception mechanisms. By integrating trainable" \
60
+ " motion energy sensing with recurrent self-attention networks, we can simulate the complex motion " \
61
+ "processing of the human visual cortex, particularly the V1-MT pathway. Our model not only parallels" \
62
+ " physiological responses in V1 and MT neurons but also replicates human psychophysical responses " \
63
+ "to dynamic stimuli. \n\n\n" \
64
+ "![](https://drive.google.com/uc?id=10PcKzQ9X1nsXKUi8OPR0jN_ZsjlCAV47) \n" \
65
+ "## Environment Configuration \n" \
66
+ "To run our model, the basic environment configuration is required:\n" \
67
+ '- Python 3.8 or higher \n' \
68
+ '- Pyotrch 2.0 \n' \
69
+ '- CUDA Toolkit 11.x (for GPU acceleration)\n' \
70
+ '- opencv-python \n' \
71
+ '- Imageio \n' \
72
+ '- Matplotlib \n\n' \
73
+ "## Preprint Paper \n" \
74
+ "The paper is available at [arXiv](https://arxiv.org/abs/2305.09156) \n" \
75
+ "## Video Presentation \n" \
76
+ "The video presentation is available at [Video Record](https://recorder-v3.slideslive.com/?share=85662&s=6afe157c-e764-4e3c-9302-2c6dd6887db1/). \n" \
77
+ "## Conference Website \n" \
78
+ "The project is presented at [NeurIPS 2023](https://neurips.cc/virtual/2023/poster/70202). \n" \
79
+ "## Below is the interactive demo of our model. You can select the videos examples below or upload your own videos. The model outputs the motion flow field, the activation of the first stage, and the attention map of the second stage." \
80
+ "We also provide two sliders to adjust the location of the attention visualizer. \n" \
81
+ " **Note**: The demo is running on CPU, so it may take a while to process the video. \n"
82
+
83
+ examples = [["example_1.mp4", 62, 56], ["example_2.mp4", 59, 55], ["example_3.mp4", 50, 50], ["example_4.mp4", 50, 50],
84
+ ["example_5.mp4", 39, 72]]
85
+
86
+ md = "![](https://drive.google.com/uc?id=1WBqYsKRwn_78A72MJBrk643l3-gfAssP) \n" \
87
+ "## Author \n" \
88
+ "This project page is developed by Zitang Sun (zitangsun96 @ gmail.com)\n" \
89
+ "## LICENSE \n" \
90
+ "This project is licensed under the terms of the MIT license. \n"
91
+ iface = gr.Interface(fn=process_images,
92
+ inputs=[gr.Video(label="Upload video or use the example images below"),
93
+ gr.Slider(0, 100, label='X location of attention visualizer'),
94
+ gr.Slider(0, 100, label='Y location of attention visualizer')],
95
+ # out put is three images
96
+ outputs=[gr.Image(type="numpy", label="Motion flow field"),
97
+ gr.Image(type="numpy", label="Activation of Stage I"),
98
+ gr.Image(type="numpy", label="Attention map of Stage II")],
99
+ title=title,
100
+ description=description,
101
+ article=md,
102
+ examples=examples)
103
+
104
+ iface.launch(debug=True, share=True)
example_1.mp4 ADDED
Binary file (614 kB). View file
 
example_2.mp4 ADDED
Binary file (190 kB). View file
 
example_3.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e3ae608d0b88e6445a1fd4e98af798627f28344ef4a5380ed9d627887330c00
3
+ size 1065191
example_4.mp4 ADDED
Binary file (47.7 kB). View file
 
example_5.mp4 ADDED
Binary file (169 kB). View file