Spaces:
Runtime error
Runtime error
File size: 3,589 Bytes
a772610 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
# Hugging Face Space: 2D to 3D Stereo Pair Generator using Depth + LaMa Inpainting
import gradio as gr
import torch
import numpy as np
import cv2
from PIL import Image
from transformers import DPTForDepthEstimation, DPTFeatureExtractor
import requests
import tempfile
import subprocess
import os
# === DEVICE ===
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# === DEPTH MODEL ===
def load_depth_model():
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas").to(device)
processor = DPTFeatureExtractor.from_pretrained("Intel/dpt-hybrid-midas")
return model, processor
@torch.no_grad()
def estimate_depth(image: Image.Image, model, processor):
image = image.resize((384, 384))
inputs = processor(images=image, return_tensors="pt").to(device)
depth = model(**inputs).predicted_depth
depth = torch.nn.functional.interpolate(
depth.unsqueeze(1),
size=image.size[::-1],
mode="bicubic",
align_corners=False,
).squeeze().detach().cpu().numpy()
depth_min, depth_max = depth.min(), depth.max()
return (depth - depth_min) / (depth_max - depth_min)
def depth_to_disparity(depth, max_disp=32):
return (1.0 - depth) * max_disp
def generate_right_and_mask(image, disparity):
h, w = image.shape[:2]
right = np.zeros_like(image)
mask = np.ones((h, w), dtype=np.uint8)
for y in range(h):
for x in range(w):
d = int(round(disparity[y, x]))
x_r = x - d
if 0 <= x_r < w:
right[y, x_r] = image[y, x]
mask[y, x_r] = 0
return right, mask
# === LAMA INPAINTING ===
LAMA_API = "https://huggingface.co/spaces/saic-mdal/lama-inpainting"
def run_lama_inpainting(image_bgr, mask):
img = Image.fromarray(cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB))
mask_img = Image.fromarray(mask * 255).convert("RGB")
# Save temporarily
tmp_dir = tempfile.mkdtemp()
img_path = os.path.join(tmp_dir, "input.png")
mask_path = os.path.join(tmp_dir, "mask.png")
img.save(img_path)
mask_img.save(mask_path)
# Use Hugging Face's API-compatible request
files = {"image": open(img_path, "rb"), "mask": open(mask_path, "rb")}
response = requests.post(f"{LAMA_API}/run/predict", files=files)
if response.status_code == 200:
result = Image.open(requests.get(response.json()["data"][0]["name"], stream=True).raw)
return cv2.cvtColor(np.array(result), cv2.COLOR_RGB2BGR)
else:
raise Exception("LAMA inpainting failed")
# === APP LOGIC ===
depth_model, depth_processor = load_depth_model()
def stereo_pipeline(image_pil):
image = image_pil.convert("RGB")
image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
depth = estimate_depth(image, depth_model, depth_processor)
disparity = depth_to_disparity(depth)
right_img, mask = generate_right_and_mask(image_cv, disparity)
right_filled = run_lama_inpainting(right_img, mask)
left = image_pil
right = Image.fromarray(cv2.cvtColor(right_filled, cv2.COLOR_BGR2RGB))
return left, right
# === GRADIO UI ===
demo = gr.Interface(
fn=stereo_pipeline,
inputs=gr.Image(type="pil", label="Upload 2D Image"),
outputs=[
gr.Image(label="Left Eye (Original)"),
gr.Image(label="Right Eye (AI Generated)")
],
title="2D to 3D Stereo Generator with LaMa Inpainting",
description="Generates a stereo pair from a 2D image using depth estimation and LaMa AI inpainting to handle occluded pixels in the right-eye view."
)
demo.launch()
|