sapiens-demo / app.py
joselobenitezg's picture
update sapiens link
28eb7cb verified
import os
import gradio as gr
import numpy as np
from PIL import Image
import cv2
import spaces
from inference.seg import process_image_or_video as process_seg
from inference.pose import process_image_or_video as process_pose
from inference.depth import process_image_or_video as process_depth
from inference.normal import process_image_or_video as process_normal
from config import SAPIENS_LITE_MODELS_PATH
def update_model_choices(task):
model_choices = list(SAPIENS_LITE_MODELS_PATH[task.lower()].keys())
return gr.Dropdown(choices=model_choices, value=model_choices[0] if model_choices else None)
@spaces.GPU(duration=75)
def process_image(input_image, task, version):
if isinstance(input_image, np.ndarray):
input_image = Image.fromarray(input_image)
if task.lower() == 'seg':
result = process_seg(input_image, task=task.lower(), version=version)
elif task.lower() == 'pose':
result = process_pose(input_image, task=task.lower(), version=version)
elif task.lower() == 'depth':
result = process_depth(input_image, task=task.lower(), version=version)
elif task.lower() == 'normal':
result = process_normal(input_image, task=task.lower(), version=version)
else:
result = None
print(f"Tarea no soportada: {task}")
return result
@spaces.GPU(duration=75)
def process_video(input_video, task, version):
cap = cv2.VideoCapture(input_video)
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
output_video = cv2.VideoWriter('output_video.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
if task.lower() == 'seg':
processed_frame = process_seg(frame_rgb, task=task.lower(), version=version)
elif task.lower() == 'pose':
processed_frame = process_pose(frame_rgb, task=task.lower(), version=version)
elif task.lower() == 'depth':
processed_frame = process_depth(frame_rgb, task=task.lower(), version=version)
elif task.lower() == 'normal':
processed_frame = process_normal(frame_rgb, task=task.lower(), version=version)
else:
processed_frame = None
print(f"Tarea no soportada: {task}")
break
if processed_frame is not None:
processed_frame_bgr = cv2.cvtColor(np.array(processed_frame), cv2.COLOR_RGB2BGR)
output_video.write(processed_frame_bgr)
cap.release()
output_video.release()
return 'output_video.mp4'
with gr.Blocks() as demo:
gr.Markdown("""
<div style="text-align: center; font-size: 35px; font-weight: bold; margin-bottom: 20px;">
Sapiens Huggingface Space🤗
</div>
<div style="text-align: center; font-size: 25px; font-weight: bold; margin-bottom: 20px;">
Foundation for Human Vision Models
</div>
<div style="text-align: center;">
<a href="https://huggingface.co/facebook/sapiens">🤗 Sapiens Models</a> |
<a href="https://github.com/facebookresearch/sapiens/">🌐 Github</a> |
<a href="https://www.arxiv.org/abs/2408.12569">📜 arxiv </a> |
<a href="https://joselo.ai">🔗Personal Blog </a>
</div>
<div style="text-align: center; font-size: 15px; font-weight: bold; margin-bottom: 20px;">
Sapiens, a family of models for four fundamental human-centric vision tasks - 2D pose estimation, body-part segmentation, depth estimation, and surface normal prediction.
</div>
""")
with gr.Tabs():
with gr.TabItem('Image'):
with gr.Row():
with gr.Column():
input_image = gr.Image(label="Input Image", type="pil")
select_task_image = gr.Radio(
["seg", "pose", "depth", "normal"],
label="Task",
info="Choose the task to perform",
value="seg"
)
model_name_image = gr.Dropdown(
label="Model Version",
choices=list(SAPIENS_LITE_MODELS_PATH["seg"].keys()),
value="sapiens_0.3b",
)
with gr.Column():
result_image = gr.Image(label="Result")
run_button_image = gr.Button("Run")
with gr.TabItem('Video'):
with gr.Row():
with gr.Column():
input_video = gr.Video(label="Input Video")
select_task_video = gr.Radio(
["seg", "pose", "depth", "normal"],
label="Task",
info="Choose the task to perform",
value="seg"
)
model_name_video = gr.Dropdown(
label="Model Version",
choices=list(SAPIENS_LITE_MODELS_PATH["seg"].keys()),
value="sapiens_0.3b",
)
with gr.Column():
result_video = gr.Video(label="Result")
run_button_video = gr.Button("Run")
select_task_image.change(fn=update_model_choices, inputs=select_task_image, outputs=model_name_image)
select_task_video.change(fn=update_model_choices, inputs=select_task_video, outputs=model_name_video)
run_button_image.click(
fn=process_image,
inputs=[input_image, select_task_image, model_name_image],
outputs=[result_image],
)
run_button_video.click(
fn=process_video,
inputs=[input_video, select_task_video, model_name_video],
outputs=[result_video],
)
if __name__ == "__main__":
demo.launch(share=False)