|
import gradio as gr |
|
import os |
|
|
|
from torch import is_inference |
|
from pq3d.inference import inference |
|
|
|
MESH_DIR = 'assets/mesh' |
|
MESH_NAMES = sorted([os.path.splitext(fname)[0] for fname in os.listdir(MESH_DIR)]) |
|
|
|
def change_scene(dropdown_scene: str): |
|
|
|
return os.path.join(MESH_DIR, f'{dropdown_scene}.glb'), dropdown_scene |
|
|
|
with gr.Blocks(title='PQ3D Demo') as demo: |
|
gr.HTML(value="<h1 align='center'>Unifying 3D Vision Language Understanding vis Promptable Queries </h1>") |
|
|
|
|
|
|
|
|
|
|
|
with gr.Row(): |
|
with gr.Column(scale=5): |
|
dropdown_scene = gr.Dropdown( |
|
choices=MESH_NAMES, |
|
value='scene0050_00', |
|
interactive=True, |
|
label='Select a 3D scene', |
|
) |
|
model_3d = gr.Model3D( |
|
value=os.path.join(MESH_DIR, f'scene0050_00.glb'), |
|
clear_color=[0.0, 0.0, 0.0, 0.0], |
|
label='3D Scene', |
|
camera_position=(80, 100, 6), |
|
height=659, |
|
) |
|
gr.HTML( |
|
"""<center><strong> |
|
π SCROLL and DRAG on the 3D Scene |
|
to zoom in/out and rotate. Press CTRL and DRAG to pan. |
|
</strong></center> |
|
""" |
|
) |
|
scan_id = gr.Text("scene0050_00", label='scan_id') |
|
|
|
dropdown_scene.change( |
|
fn=change_scene, |
|
inputs=[dropdown_scene], |
|
outputs=[model_3d, scan_id], |
|
queue=False |
|
) |
|
|
|
def inference_wrapper(text, scan_id): |
|
inst_id, response = inference(scan_id, text) |
|
return f"assets/mask/{scan_id}/{scan_id}_obj_{inst_id}.glb", response |
|
|
|
gr.Interface( |
|
fn=inference_wrapper, |
|
inputs=["text", scan_id], |
|
outputs=[gr.Model3D( |
|
clear_color=[0.0, 0.0, 0.0, 0.0], camera_position=(80, 100, 6), label="3D Model"), "text"], |
|
examples=[ |
|
["armchair", "scene0050_00"], ["Sofa", "scene0050_00"], ["left computer on the desk", "scene0050_00"] |
|
], |
|
title="Input text, Output 3D Mask, Red denotes predicted object" |
|
) |
|
|
|
demo.queue().launch(share=True, allowed_paths=['assets']) |