johnrobinsn's picture
Update app.py
08b9599 verified
import gradio as gr
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
import torch
import numpy as np
from PIL import Image
from pathlib import Path
from depth_viewer import depthviewer2html
import cv2
feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
def process_image(image_path):
image_path = Path(image_path)
image = Image.open(image_path)
# if wider than 512 pixels let's resample to keep it performant on phones etc
if (image.size[0] > 512):
image = image.resize((512,int(512*image.size[1]/image.size[0])),Image.Resampling.LANCZOS)
# prepare image for the model
encoding = feature_extractor(image, return_tensors="pt")
# forward pass
with torch.no_grad():
outputs = model(**encoding)
predicted_depth = outputs.predicted_depth
# interpolate to original size
prediction = torch.nn.functional.interpolate(
predicted_depth.unsqueeze(1),
size=image.size[::-1],
mode="bicubic",
align_corners=False,
).squeeze()
output = prediction.cpu().numpy()
depth = (output * 255 / np.max(output)).astype('uint8')
h = depthviewer2html(image,depth)
return [h]
title = "3d Visualization of Depth Maps Generated using MiDaS"
description = "Improved 3D interactive depth viewer using Three.js embedded in a Gradio app. For more details see the <a href='https://colab.research.google.com/drive/1l2l8U7Vhq9RnvV2tHyfhrXKNuHfmb4IP?usp=sharing'>Colab Notebook.</a>"
examples = [["examples/owl1.jpg"],['examples/marsattacks.jpg'],['examples/kitten.jpg']]
iface = gr.Interface(fn=process_image,
inputs=[gr.Image(type="filepath",label="Input Image")],
outputs=[gr.HTML(label='Depth Viewer',elem_id='depth-viewer')],
title=title,
description=description,
examples=examples,
allow_flagging="never",
cache_examples=False,
css='#depth-viewer: {height:300px;}')
iface.launch(debug=True)