Spaces:
Runtime error
Runtime error
# Hugging Face Space: 2D to 3D Stereo Pair Generator using Depth + LaMa Inpainting | |
import gradio as gr | |
import torch | |
import numpy as np | |
import cv2 | |
from PIL import Image | |
from transformers import DPTForDepthEstimation, DPTFeatureExtractor | |
import requests | |
import tempfile | |
import subprocess | |
import os | |
# === DEVICE === | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
# === DEPTH MODEL === | |
def load_depth_model(): | |
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas").to(device) | |
processor = DPTFeatureExtractor.from_pretrained("Intel/dpt-hybrid-midas") | |
return model, processor | |
def estimate_depth(image: Image.Image, model, processor): | |
image = image.resize((384, 384)) | |
inputs = processor(images=image, return_tensors="pt").to(device) | |
depth = model(**inputs).predicted_depth | |
depth = torch.nn.functional.interpolate( | |
depth.unsqueeze(1), | |
size=image.size[::-1], | |
mode="bicubic", | |
align_corners=False, | |
).squeeze().detach().cpu().numpy() | |
depth_min, depth_max = depth.min(), depth.max() | |
return (depth - depth_min) / (depth_max - depth_min) | |
def depth_to_disparity(depth, max_disp=32): | |
return (1.0 - depth) * max_disp | |
def generate_right_and_mask(image, disparity): | |
h, w = image.shape[:2] | |
right = np.zeros_like(image) | |
mask = np.ones((h, w), dtype=np.uint8) | |
for y in range(h): | |
for x in range(w): | |
d = int(round(disparity[y, x])) | |
x_r = x - d | |
if 0 <= x_r < w: | |
right[y, x_r] = image[y, x] | |
mask[y, x_r] = 0 | |
return right, mask | |
# === LAMA INPAINTING === | |
LAMA_API = "https://huggingface.co/spaces/saic-mdal/lama-inpainting" | |
def run_lama_inpainting(image_bgr, mask): | |
img = Image.fromarray(cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)) | |
mask_img = Image.fromarray(mask * 255).convert("RGB") | |
# Save temporarily | |
tmp_dir = tempfile.mkdtemp() | |
img_path = os.path.join(tmp_dir, "input.png") | |
mask_path = os.path.join(tmp_dir, "mask.png") | |
img.save(img_path) | |
mask_img.save(mask_path) | |
# Use Hugging Face's API-compatible request | |
files = {"image": open(img_path, "rb"), "mask": open(mask_path, "rb")} | |
response = requests.post(f"{LAMA_API}/run/predict", files=files) | |
if response.status_code == 200: | |
result = Image.open(requests.get(response.json()["data"][0]["name"], stream=True).raw) | |
return cv2.cvtColor(np.array(result), cv2.COLOR_RGB2BGR) | |
else: | |
raise Exception("LAMA inpainting failed") | |
# === APP LOGIC === | |
depth_model, depth_processor = load_depth_model() | |
def stereo_pipeline(image_pil): | |
image = image_pil.convert("RGB") | |
image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) | |
depth = estimate_depth(image, depth_model, depth_processor) | |
disparity = depth_to_disparity(depth) | |
right_img, mask = generate_right_and_mask(image_cv, disparity) | |
right_filled = run_lama_inpainting(right_img, mask) | |
left = image_pil | |
right = Image.fromarray(cv2.cvtColor(right_filled, cv2.COLOR_BGR2RGB)) | |
return left, right | |
# === GRADIO UI === | |
demo = gr.Interface( | |
fn=stereo_pipeline, | |
inputs=gr.Image(type="pil", label="Upload 2D Image"), | |
outputs=[ | |
gr.Image(label="Left Eye (Original)"), | |
gr.Image(label="Right Eye (AI Generated)") | |
], | |
title="2D to 3D Stereo Generator with LaMa Inpainting", | |
description="Generates a stereo pair from a 2D image using depth estimation and LaMa AI inpainting to handle occluded pixels in the right-eye view." | |
) | |
demo.launch() | |