import os import gradio as gr import numpy as np from PIL import Image import cv2 import spaces from inference.seg import process_image_or_video as process_seg from inference.pose import process_image_or_video as process_pose from inference.depth import process_image_or_video as process_depth from inference.normal import process_image_or_video as process_normal from config import SAPIENS_LITE_MODELS_PATH def update_model_choices(task): model_choices = list(SAPIENS_LITE_MODELS_PATH[task.lower()].keys()) return gr.Dropdown(choices=model_choices, value=model_choices[0] if model_choices else None) @spaces.GPU(duration=75) def process_image(input_image, task, version): if isinstance(input_image, np.ndarray): input_image = Image.fromarray(input_image) if task.lower() == 'seg': result = process_seg(input_image, task=task.lower(), version=version) elif task.lower() == 'pose': result = process_pose(input_image, task=task.lower(), version=version) elif task.lower() == 'depth': result = process_depth(input_image, task=task.lower(), version=version) elif task.lower() == 'normal': result = process_normal(input_image, task=task.lower(), version=version) else: result = None print(f"Tarea no soportada: {task}") return result @spaces.GPU(duration=75) def process_video(input_video, task, version): cap = cv2.VideoCapture(input_video) fps = cap.get(cv2.CAP_PROP_FPS) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) output_video = cv2.VideoWriter('output_video.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height)) while cap.isOpened(): ret, frame = cap.read() if not ret: break frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) if task.lower() == 'seg': processed_frame = process_seg(frame_rgb, task=task.lower(), version=version) elif task.lower() == 'pose': processed_frame = process_pose(frame_rgb, task=task.lower(), version=version) elif task.lower() == 'depth': processed_frame = process_depth(frame_rgb, task=task.lower(), version=version) elif task.lower() == 'normal': processed_frame = process_normal(frame_rgb, task=task.lower(), version=version) else: processed_frame = None print(f"Tarea no soportada: {task}") break if processed_frame is not None: processed_frame_bgr = cv2.cvtColor(np.array(processed_frame), cv2.COLOR_RGB2BGR) output_video.write(processed_frame_bgr) cap.release() output_video.release() return 'output_video.mp4' with gr.Blocks() as demo: gr.Markdown("""
Sapiens Huggingface Space🤗
Foundation for Human Vision Models
🤗 Sapiens Models | 🌐 Github | 📜 arxiv | 🔗Personal Blog
Sapiens, a family of models for four fundamental human-centric vision tasks - 2D pose estimation, body-part segmentation, depth estimation, and surface normal prediction.
""") with gr.Tabs(): with gr.TabItem('Image'): with gr.Row(): with gr.Column(): input_image = gr.Image(label="Input Image", type="pil") select_task_image = gr.Radio( ["seg", "pose", "depth", "normal"], label="Task", info="Choose the task to perform", value="seg" ) model_name_image = gr.Dropdown( label="Model Version", choices=list(SAPIENS_LITE_MODELS_PATH["seg"].keys()), value="sapiens_0.3b", ) with gr.Column(): result_image = gr.Image(label="Result") run_button_image = gr.Button("Run") with gr.TabItem('Video'): with gr.Row(): with gr.Column(): input_video = gr.Video(label="Input Video") select_task_video = gr.Radio( ["seg", "pose", "depth", "normal"], label="Task", info="Choose the task to perform", value="seg" ) model_name_video = gr.Dropdown( label="Model Version", choices=list(SAPIENS_LITE_MODELS_PATH["seg"].keys()), value="sapiens_0.3b", ) with gr.Column(): result_video = gr.Video(label="Result") run_button_video = gr.Button("Run") select_task_image.change(fn=update_model_choices, inputs=select_task_image, outputs=model_name_image) select_task_video.change(fn=update_model_choices, inputs=select_task_video, outputs=model_name_video) run_button_image.click( fn=process_image, inputs=[input_image, select_task_image, model_name_image], outputs=[result_image], ) run_button_video.click( fn=process_video, inputs=[input_video, select_task_video, model_name_video], outputs=[result_video], ) if __name__ == "__main__": demo.launch(share=False)