Spaces:
Build error
Build error
oneScotch
commited on
Commit
·
bcd4eea
1
Parent(s):
c24cd83
fix minor issue when changing to python 3.9
Browse files
app.py
CHANGED
@@ -1,134 +1,134 @@
|
|
1 |
-
import os
|
2 |
-
import sys
|
3 |
-
import os.path as osp
|
4 |
-
from pathlib import Path
|
5 |
-
import cv2
|
6 |
-
import gradio as gr
|
7 |
-
import torch
|
8 |
-
import math
|
9 |
-
import spaces
|
10 |
-
|
11 |
-
try:
|
12 |
-
import mmpose
|
13 |
-
except:
|
14 |
-
os.system('pip install /home/user/app/main/transformer_utils')
|
15 |
-
|
16 |
-
os.system('cp -rf /home/user/app/assets/conversions.py /home/user/.pyenv/versions/3.
|
17 |
-
DEFAULT_MODEL='smpler_x_h32'
|
18 |
-
OUT_FOLDER = '/home/user/app/demo_out'
|
19 |
-
os.makedirs(OUT_FOLDER, exist_ok=True)
|
20 |
-
num_gpus = 1 if torch.cuda.is_available() else -1
|
21 |
-
print("!!!", torch.cuda.is_available())
|
22 |
-
print(torch.cuda.device_count())
|
23 |
-
print(torch.version.cuda)
|
24 |
-
index = torch.cuda.current_device()
|
25 |
-
print(index)
|
26 |
-
print(torch.cuda.get_device_name(index))
|
27 |
-
from main.inference import Inferer
|
28 |
-
inferer = Inferer(DEFAULT_MODEL, num_gpus, OUT_FOLDER)
|
29 |
-
|
30 |
-
@spaces.GPU(enable_queue=True)
|
31 |
-
def infer(video_input, in_threshold=0.5, num_people="Single person", render_mesh=False):
|
32 |
-
os.system(f'rm -rf {OUT_FOLDER}/*')
|
33 |
-
multi_person = False if (num_people == "Single person") else True
|
34 |
-
cap = cv2.VideoCapture(video_input)
|
35 |
-
fps = math.ceil(cap.get(5))
|
36 |
-
width = int(cap.get(3))
|
37 |
-
height = int(cap.get(4))
|
38 |
-
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
39 |
-
video_path = osp.join(OUT_FOLDER, f'out.m4v')
|
40 |
-
final_video_path = osp.join(OUT_FOLDER, f'out.mp4')
|
41 |
-
video_output = cv2.VideoWriter(video_path, fourcc, fps, (width, height))
|
42 |
-
success = 1
|
43 |
-
frame = 0
|
44 |
-
while success:
|
45 |
-
success, original_img = cap.read()
|
46 |
-
if not success:
|
47 |
-
break
|
48 |
-
frame += 1
|
49 |
-
img, mesh_paths, smplx_paths = inferer.infer(original_img, in_threshold, frame, multi_person, not(render_mesh))
|
50 |
-
video_output.write(img)
|
51 |
-
yield img, None, None, None
|
52 |
-
cap.release()
|
53 |
-
video_output.release()
|
54 |
-
cv2.destroyAllWindows()
|
55 |
-
os.system(f'ffmpeg -i {video_path} -c copy {final_video_path}')
|
56 |
-
|
57 |
-
#Compress mesh and smplx files
|
58 |
-
save_path_mesh = os.path.join(OUT_FOLDER, 'mesh')
|
59 |
-
save_mesh_file = os.path.join(OUT_FOLDER, 'mesh.zip')
|
60 |
-
os.makedirs(save_path_mesh, exist_ok= True)
|
61 |
-
save_path_smplx = os.path.join(OUT_FOLDER, 'smplx')
|
62 |
-
save_smplx_file = os.path.join(OUT_FOLDER, 'smplx.zip')
|
63 |
-
os.makedirs(save_path_smplx, exist_ok= True)
|
64 |
-
os.system(f'zip -r {save_mesh_file} {save_path_mesh}')
|
65 |
-
os.system(f'zip -r {save_smplx_file} {save_path_smplx}')
|
66 |
-
yield img, video_path, save_mesh_file, save_smplx_file
|
67 |
-
|
68 |
-
TITLE = '''<h1 align="center">SMPLer-X: Scaling Up Expressive Human Pose and Shape Estimation</h1>'''
|
69 |
-
VIDEO = '''
|
70 |
-
<center><iframe width="960" height="540"
|
71 |
-
src="https://www.youtube.com/embed/DepTqbPpVzY?si=qSeQuX-bgm_rON7E"title="SMPLer-X: Scaling Up Expressive Human Pose and Shape Estimation" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" allowfullscreen>
|
72 |
-
</iframe>
|
73 |
-
</center><br>'''
|
74 |
-
DESCRIPTION = '''
|
75 |
-
<b>Official Gradio demo</b> for <a href="https://caizhongang.com/projects/SMPLer-X/"><b>SMPLer-X: Scaling Up Expressive Human Pose and Shape Estimation</b></a>.<br>
|
76 |
-
<p>
|
77 |
-
Note: You can drop a video at the panel (or select one of the examples)
|
78 |
-
to obtain the 3D parametric reconstructions of the detected humans.
|
79 |
-
</p>
|
80 |
-
'''
|
81 |
-
|
82 |
-
with gr.Blocks(title="SMPLer-X", css=".gradio-container") as demo:
|
83 |
-
|
84 |
-
gr.Markdown(TITLE)
|
85 |
-
gr.HTML(VIDEO)
|
86 |
-
gr.Markdown(DESCRIPTION)
|
87 |
-
|
88 |
-
with gr.Row():
|
89 |
-
with gr.Column():
|
90 |
-
video_input = gr.Video(label="Input video", elem_classes="video")
|
91 |
-
threshold = gr.Slider(0, 1.0, value=0.5, label='BBox detection threshold')
|
92 |
-
with gr.Column(scale=2):
|
93 |
-
num_people = gr.Radio(
|
94 |
-
choices=["Single person", "Multiple people"],
|
95 |
-
value="Single person",
|
96 |
-
label="Number of people",
|
97 |
-
info="Choose how many people are there in the video. Choose 'single person' for faster inference.",
|
98 |
-
interactive=True,
|
99 |
-
scale=1,)
|
100 |
-
gr.HTML("""<br/>""")
|
101 |
-
mesh_as_vertices = gr.Checkbox(
|
102 |
-
label="Render as mesh",
|
103 |
-
info="By default, the estimated SMPL-X parameters are rendered as vertices for faster visualization. Check this option if you want to visualize meshes instead.",
|
104 |
-
interactive=True,
|
105 |
-
scale=1,)
|
106 |
-
|
107 |
-
send_button = gr.Button("Infer")
|
108 |
-
gr.HTML("""<br/>""")
|
109 |
-
|
110 |
-
with gr.Row():
|
111 |
-
with gr.Column():
|
112 |
-
processed_frames = gr.Image(label="Last processed frame")
|
113 |
-
video_output = gr.Video(elem_classes="video")
|
114 |
-
with gr.Column():
|
115 |
-
meshes_output = gr.File(label="3D meshes")
|
116 |
-
smplx_output = gr.File(label= "SMPL-X models")
|
117 |
-
# example_images = gr.Examples([])
|
118 |
-
send_button.click(fn=infer, inputs=[video_input, threshold, num_people, mesh_as_vertices], outputs=[processed_frames, video_output, meshes_output, smplx_output])
|
119 |
-
# with gr.Row():
|
120 |
-
example_videos = gr.Examples([
|
121 |
-
['/home/user/app/assets/01.mp4'],
|
122 |
-
['/home/user/app/assets/02.mp4'],
|
123 |
-
['/home/user/app/assets/03.mp4'],
|
124 |
-
['/home/user/app/assets/04.mp4'],
|
125 |
-
['/home/user/app/assets/05.mp4'],
|
126 |
-
['/home/user/app/assets/06.mp4'],
|
127 |
-
['/home/user/app/assets/07.mp4'],
|
128 |
-
['/home/user/app/assets/08.mp4'],
|
129 |
-
['/home/user/app/assets/09.mp4'],
|
130 |
-
],
|
131 |
-
inputs=[video_input, 0.5])
|
132 |
-
|
133 |
-
#demo.queue()
|
134 |
-
demo.queue().launch(debug=True)
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import os.path as osp
|
4 |
+
from pathlib import Path
|
5 |
+
import cv2
|
6 |
+
import gradio as gr
|
7 |
+
import torch
|
8 |
+
import math
|
9 |
+
import spaces
|
10 |
+
|
11 |
+
try:
|
12 |
+
import mmpose
|
13 |
+
except:
|
14 |
+
os.system('pip install /home/user/app/main/transformer_utils')
|
15 |
+
|
16 |
+
os.system('cp -rf /home/user/app/assets/conversions.py /home/user/.pyenv/versions/3.9.18/lib/python3.9/site-packages/torchgeometry/core/conversions.py')
|
17 |
+
DEFAULT_MODEL='smpler_x_h32'
|
18 |
+
OUT_FOLDER = '/home/user/app/demo_out'
|
19 |
+
os.makedirs(OUT_FOLDER, exist_ok=True)
|
20 |
+
num_gpus = 1 if torch.cuda.is_available() else -1
|
21 |
+
print("!!!", torch.cuda.is_available())
|
22 |
+
print(torch.cuda.device_count())
|
23 |
+
print(torch.version.cuda)
|
24 |
+
index = torch.cuda.current_device()
|
25 |
+
print(index)
|
26 |
+
print(torch.cuda.get_device_name(index))
|
27 |
+
from main.inference import Inferer
|
28 |
+
inferer = Inferer(DEFAULT_MODEL, num_gpus, OUT_FOLDER)
|
29 |
+
|
30 |
+
@spaces.GPU(enable_queue=True)
|
31 |
+
def infer(video_input, in_threshold=0.5, num_people="Single person", render_mesh=False):
|
32 |
+
os.system(f'rm -rf {OUT_FOLDER}/*')
|
33 |
+
multi_person = False if (num_people == "Single person") else True
|
34 |
+
cap = cv2.VideoCapture(video_input)
|
35 |
+
fps = math.ceil(cap.get(5))
|
36 |
+
width = int(cap.get(3))
|
37 |
+
height = int(cap.get(4))
|
38 |
+
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
39 |
+
video_path = osp.join(OUT_FOLDER, f'out.m4v')
|
40 |
+
final_video_path = osp.join(OUT_FOLDER, f'out.mp4')
|
41 |
+
video_output = cv2.VideoWriter(video_path, fourcc, fps, (width, height))
|
42 |
+
success = 1
|
43 |
+
frame = 0
|
44 |
+
while success:
|
45 |
+
success, original_img = cap.read()
|
46 |
+
if not success:
|
47 |
+
break
|
48 |
+
frame += 1
|
49 |
+
img, mesh_paths, smplx_paths = inferer.infer(original_img, in_threshold, frame, multi_person, not(render_mesh))
|
50 |
+
video_output.write(img)
|
51 |
+
yield img, None, None, None
|
52 |
+
cap.release()
|
53 |
+
video_output.release()
|
54 |
+
cv2.destroyAllWindows()
|
55 |
+
os.system(f'ffmpeg -i {video_path} -c copy {final_video_path}')
|
56 |
+
|
57 |
+
#Compress mesh and smplx files
|
58 |
+
save_path_mesh = os.path.join(OUT_FOLDER, 'mesh')
|
59 |
+
save_mesh_file = os.path.join(OUT_FOLDER, 'mesh.zip')
|
60 |
+
os.makedirs(save_path_mesh, exist_ok= True)
|
61 |
+
save_path_smplx = os.path.join(OUT_FOLDER, 'smplx')
|
62 |
+
save_smplx_file = os.path.join(OUT_FOLDER, 'smplx.zip')
|
63 |
+
os.makedirs(save_path_smplx, exist_ok= True)
|
64 |
+
os.system(f'zip -r {save_mesh_file} {save_path_mesh}')
|
65 |
+
os.system(f'zip -r {save_smplx_file} {save_path_smplx}')
|
66 |
+
yield img, video_path, save_mesh_file, save_smplx_file
|
67 |
+
|
68 |
+
TITLE = '''<h1 align="center">SMPLer-X: Scaling Up Expressive Human Pose and Shape Estimation</h1>'''
|
69 |
+
VIDEO = '''
|
70 |
+
<center><iframe width="960" height="540"
|
71 |
+
src="https://www.youtube.com/embed/DepTqbPpVzY?si=qSeQuX-bgm_rON7E"title="SMPLer-X: Scaling Up Expressive Human Pose and Shape Estimation" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" allowfullscreen>
|
72 |
+
</iframe>
|
73 |
+
</center><br>'''
|
74 |
+
DESCRIPTION = '''
|
75 |
+
<b>Official Gradio demo</b> for <a href="https://caizhongang.com/projects/SMPLer-X/"><b>SMPLer-X: Scaling Up Expressive Human Pose and Shape Estimation</b></a>.<br>
|
76 |
+
<p>
|
77 |
+
Note: You can drop a video at the panel (or select one of the examples)
|
78 |
+
to obtain the 3D parametric reconstructions of the detected humans.
|
79 |
+
</p>
|
80 |
+
'''
|
81 |
+
|
82 |
+
with gr.Blocks(title="SMPLer-X", css=".gradio-container") as demo:
|
83 |
+
|
84 |
+
gr.Markdown(TITLE)
|
85 |
+
gr.HTML(VIDEO)
|
86 |
+
gr.Markdown(DESCRIPTION)
|
87 |
+
|
88 |
+
with gr.Row():
|
89 |
+
with gr.Column():
|
90 |
+
video_input = gr.Video(label="Input video", elem_classes="video")
|
91 |
+
threshold = gr.Slider(0, 1.0, value=0.5, label='BBox detection threshold')
|
92 |
+
with gr.Column(scale=2):
|
93 |
+
num_people = gr.Radio(
|
94 |
+
choices=["Single person", "Multiple people"],
|
95 |
+
value="Single person",
|
96 |
+
label="Number of people",
|
97 |
+
info="Choose how many people are there in the video. Choose 'single person' for faster inference.",
|
98 |
+
interactive=True,
|
99 |
+
scale=1,)
|
100 |
+
gr.HTML("""<br/>""")
|
101 |
+
mesh_as_vertices = gr.Checkbox(
|
102 |
+
label="Render as mesh",
|
103 |
+
info="By default, the estimated SMPL-X parameters are rendered as vertices for faster visualization. Check this option if you want to visualize meshes instead.",
|
104 |
+
interactive=True,
|
105 |
+
scale=1,)
|
106 |
+
|
107 |
+
send_button = gr.Button("Infer")
|
108 |
+
gr.HTML("""<br/>""")
|
109 |
+
|
110 |
+
with gr.Row():
|
111 |
+
with gr.Column():
|
112 |
+
processed_frames = gr.Image(label="Last processed frame")
|
113 |
+
video_output = gr.Video(elem_classes="video")
|
114 |
+
with gr.Column():
|
115 |
+
meshes_output = gr.File(label="3D meshes")
|
116 |
+
smplx_output = gr.File(label= "SMPL-X models")
|
117 |
+
# example_images = gr.Examples([])
|
118 |
+
send_button.click(fn=infer, inputs=[video_input, threshold, num_people, mesh_as_vertices], outputs=[processed_frames, video_output, meshes_output, smplx_output])
|
119 |
+
# with gr.Row():
|
120 |
+
example_videos = gr.Examples([
|
121 |
+
['/home/user/app/assets/01.mp4'],
|
122 |
+
['/home/user/app/assets/02.mp4'],
|
123 |
+
['/home/user/app/assets/03.mp4'],
|
124 |
+
['/home/user/app/assets/04.mp4'],
|
125 |
+
['/home/user/app/assets/05.mp4'],
|
126 |
+
['/home/user/app/assets/06.mp4'],
|
127 |
+
['/home/user/app/assets/07.mp4'],
|
128 |
+
['/home/user/app/assets/08.mp4'],
|
129 |
+
['/home/user/app/assets/09.mp4'],
|
130 |
+
],
|
131 |
+
inputs=[video_input, 0.5])
|
132 |
+
|
133 |
+
#demo.queue()
|
134 |
+
demo.queue().launch(debug=True)
|