|
import os |
|
from PIL import Image |
|
|
|
import gradio as gr |
|
import numpy as np |
|
import torch |
|
from transformers import AutoModelForDepthEstimation, DPTImageProcessor |
|
|
|
|
|
processor = DPTImageProcessor.from_pretrained( |
|
"Intel/dpt-large") |
|
model = AutoModelForDepthEstimation.from_pretrained("Intel/dpt-large") |
|
|
|
|
|
def main(image, input_size=384): |
|
|
|
inputs = processor(images=image, return_tensors="pt", do_resize=True, size=( |
|
input_size, input_size), keep_aspect_ratio=True) |
|
print(type(inputs), inputs.data["pixel_values"].shape) |
|
|
|
|
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
predicted_depth = outputs.predicted_depth |
|
|
|
|
|
prediction = torch.nn.functional.interpolate(predicted_depth.unsqueeze( |
|
1), size=image.shape[:-1], mode="bicubic").squeeze() |
|
output = prediction.cpu().numpy().copy() |
|
formatted = (output * 255 / output.max()).astype("uint8") |
|
depth = Image.fromarray(formatted) |
|
return depth |
|
|
|
|
|
title = "Demo: monocular depth estimation with DPT" |
|
description = "This demo uses <a href='https://huggingface.co/Intel/dpt-large' target='_blank'>DPT</a> to estimate depth from monocular image." |
|
examples = [[f"examples/{file}"] |
|
for file in os.listdir("examples") if file[0] != "."] |
|
|
|
demo = gr.Interface(fn=main, inputs=[gr.Image(label="Input Image"), gr.Slider(128, 512, value=384, label="Input Size")], outputs="image", |
|
title=title, description=description, examples=examples, cache_examples=True) |
|
demo.launch(debug=True, share=False) |
|
|