import os from PIL import Image import gradio as gr import numpy as np import torch from transformers import AutoModelForDepthEstimation, DPTImageProcessor processor = DPTImageProcessor.from_pretrained( "Intel/dpt-large") model = AutoModelForDepthEstimation.from_pretrained("Intel/dpt-large") def main(image, input_size=384): # prepare image for the model inputs = processor(images=image, return_tensors="pt", do_resize=True, size=( input_size, input_size), keep_aspect_ratio=True) print(type(inputs), inputs.data["pixel_values"].shape) # do inference with torch.no_grad(): outputs = model(**inputs) predicted_depth = outputs.predicted_depth # interpolate to original size prediction = torch.nn.functional.interpolate(predicted_depth.unsqueeze( 1), size=image.shape[:-1], mode="bicubic").squeeze() output = prediction.cpu().numpy().copy() formatted = (output * 255 / output.max()).astype("uint8") depth = Image.fromarray(formatted) return depth title = "Demo: monocular depth estimation with DPT" description = "This demo uses DPT to estimate depth from monocular image." examples = [[f"examples/{file}"] for file in os.listdir("examples") if file[0] != "."] demo = gr.Interface(fn=main, inputs=[gr.Image(label="Input Image"), gr.Slider(128, 512, value=384, label="Input Size")], outputs="image", title=title, description=description, examples=examples, cache_examples=True) demo.launch(debug=True, share=True)