Spaces:

yejunliang23
/

ShapLLM-Omni

Running on Zero

App Files Files Community

yejunliang23 commited on Jun 7

Commit

b57e1c8

verified ·

1 Parent(s): 9a346a6

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -12

app.py CHANGED Viewed

@@ -1,11 +1,12 @@
 import gradio as gr
 import os
-import os
 os.environ['SPCONV_ALGO'] = 'native'
 import spaces
 import warp as wp
 import subprocess
 import torch
 from threading import Thread
 from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor,TextIteratorStreamer,AutoTokenizer
 from qwen_vl_utils import process_vision_info
@@ -24,6 +25,8 @@ import open3d as o3d
 from huggingface_hub import hf_hub_download
 import numpy as np
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 def _remove_image_special(text):
     text = text.replace('<ref>', '').replace('</ref>', '')
@@ -66,7 +69,7 @@ def save_ply_from_array(verts):
     return tmpf.name
 @spaces.GPU(duration=120)
-def predict(_chatbot,task_history,viewer_voxel,viewer_mesh,task_new,seed,top_k,top_p,temperature):
     torch.manual_seed(seed)
     chat_query = _chatbot[-1][0]
     query = task_history[-1][0]
@@ -74,7 +77,7 @@ def predict(_chatbot,task_history,viewer_voxel,viewer_mesh,task_new,seed,top_k,t
     if len(chat_query) == 0:
         _chatbot.pop()
         task_history.pop()
-        return _chatbot,task_history,viewer_voxel,viewer_mesh,task_new
     print("User: " + _parse_text(query))
     history_cp = copy.deepcopy(task_history)
     full_response = ""
@@ -127,10 +130,10 @@ def predict(_chatbot,task_history,viewer_voxel,viewer_mesh,task_new,seed,top_k,t
                 new_text = f"mesh-start\n{new_text}\nmesh-end"
             full_response += new_text
             _chatbot[-1] = (_parse_text(chat_query), _parse_text(full_response))
-            yield _chatbot,viewer_voxel,viewer_mesh,task_new
     task_history[-1] = (chat_query, full_response)
-    yield _chatbot,viewer_voxel,viewer_mesh,task_new
     if encoding_indices is not None:
         print("processing mesh...")
@@ -140,7 +143,7 @@ def predict(_chatbot,task_history,viewer_voxel,viewer_mesh,task_new,seed,top_k,t
         indices       = torch.nonzero(z_s[0] == 1)
         position_recon= (indices.float() + 0.5) / 64 - 0.5
         fig = make_pointcloud_figure(position_recon)
-        yield _chatbot,fig,viewer_mesh,task_new
         position=position_recon
         coords        = ((position + 0.5) * 64).int().contiguous()
@@ -158,6 +161,14 @@ def predict(_chatbot,task_history,viewer_voxel,viewer_mesh,task_new,seed,top_k,t
                     cond    = pipeline_text.get_cond([prompt])
                     slat    = pipeline_text.sample_slat(cond, coords)
                     outputs = pipeline_text.decode_slat(slat, ['mesh', 'gaussian'])
                 glb = postprocessing_utils.to_glb(
                     outputs['gaussian'][0],
@@ -168,7 +179,7 @@ def predict(_chatbot,task_history,viewer_voxel,viewer_mesh,task_new,seed,top_k,t
                 )
                 glb.export(f"temper.glb")
                 print("processing mesh over...")
-                yield _chatbot,fig,"temper.glb",task_new
             else:
                 # image to 3d
                 with torch.no_grad():
@@ -176,6 +187,15 @@ def predict(_chatbot,task_history,viewer_voxel,viewer_mesh,task_new,seed,top_k,t
                     cond    = pipeline_image.get_cond([img])
                     slat    = pipeline_image.sample_slat(cond, coords)
                     outputs = pipeline_image.decode_slat(slat, ['mesh', 'gaussian'])
                 glb = postprocessing_utils.to_glb(
                     outputs['gaussian'][0],
                     outputs['mesh'][0],
@@ -185,10 +205,10 @@ def predict(_chatbot,task_history,viewer_voxel,viewer_mesh,task_new,seed,top_k,t
                 )
                 glb.export(f"temper.glb")
                 print("processing mesh over...")
-                yield _chatbot,fig,"temper.glb",task_new
         except:
             print("processing mesh...bug")
-            yield _chatbot,fig,viewer_mesh,task_new
 def regenerate(_chatbot, task_history):
     if not task_history:
@@ -459,7 +479,9 @@ with gr.Blocks() as demo:
             task_new     = gr.State([])
         with gr.Column():
             viewer_plot  = gr.Plot(label="Voxel Visual",scale=0.5)
-            viewer_mesh  = gr.Model3D(label="Mesh Visual", height=200,scale=1.0)
             examples_text = gr.Examples(
                 examples=[
@@ -497,8 +519,8 @@ with gr.Blocks() as demo:
     submit_btn.click(add_text, [chatbot, task_history, query,task_new],\
                                [chatbot, task_history,task_new]).then(
-        predict, [chatbot, task_history,viewer_plot,viewer_mesh,task_new,seed,top_k,top_p,temperature],\
-                 [chatbot,viewer_plot,viewer_mesh,task_new], show_progress=True
     )
     submit_btn.click(reset_user_input, [], [query])
     empty_bin.click(reset_state, [task_history], [chatbot], show_progress=True)

 import gradio as gr
 import os
 os.environ['SPCONV_ALGO'] = 'native'
 import spaces
+from gradio_litmodel3d import LitModel3D
 import warp as wp
 import subprocess
 import torch
+import uuid
 from threading import Thread
 from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor,TextIteratorStreamer,AutoTokenizer
 from qwen_vl_utils import process_vision_info
 from huggingface_hub import hf_hub_download
 import numpy as np
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
+TMP_DIR = "/tmp/Trellis-demo"
+os.makedirs(TMP_DIR, exist_ok=True)
 def _remove_image_special(text):
     text = text.replace('<ref>', '').replace('</ref>', '')
     return tmpf.name
 @spaces.GPU(duration=120)
+def predict(_chatbot,task_history,viewer_voxel,viewer_mesh,task_new,seed,top_k,top_p,temperature,video_path):
     torch.manual_seed(seed)
     chat_query = _chatbot[-1][0]
     query = task_history[-1][0]
     if len(chat_query) == 0:
         _chatbot.pop()
         task_history.pop()
+        return _chatbot,task_history,viewer_voxel,viewer_mesh,task_new,video_path
     print("User: " + _parse_text(query))
     history_cp = copy.deepcopy(task_history)
     full_response = ""
                 new_text = f"mesh-start\n{new_text}\nmesh-end"
             full_response += new_text
             _chatbot[-1] = (_parse_text(chat_query), _parse_text(full_response))
+            yield _chatbot,viewer_voxel,viewer_mesh,task_new,video_path
     task_history[-1] = (chat_query, full_response)
+    yield _chatbot,viewer_voxel,viewer_mesh,task_new,video_path
     if encoding_indices is not None:
         print("processing mesh...")
         indices       = torch.nonzero(z_s[0] == 1)
         position_recon= (indices.float() + 0.5) / 64 - 0.5
         fig = make_pointcloud_figure(position_recon)
+        yield _chatbot,fig,viewer_mesh,task_new,video_path
         position=position_recon
         coords        = ((position + 0.5) * 64).int().contiguous()
                     cond    = pipeline_text.get_cond([prompt])
                     slat    = pipeline_text.sample_slat(cond, coords)
                     outputs = pipeline_text.decode_slat(slat, ['mesh', 'gaussian'])
+                video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
+                video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
+                video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
+                trial_id = uuid.uuid4()
+                video_path = f"{TMP_DIR}/{trial_id}.mp4"
+                os.makedirs(os.path.dirname(video_path), exist_ok=True)
+                imageio.mimsave(video_path, video, fps=15)
                 glb = postprocessing_utils.to_glb(
                     outputs['gaussian'][0],
                 )
                 glb.export(f"temper.glb")
                 print("processing mesh over...")
+                yield _chatbot,fig,"temper.glb",task_new,video_path
             else:
                 # image to 3d
                 with torch.no_grad():
                     cond    = pipeline_image.get_cond([img])
                     slat    = pipeline_image.sample_slat(cond, coords)
                     outputs = pipeline_image.decode_slat(slat, ['mesh', 'gaussian'])
+                video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
+                video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
+                video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
+                trial_id = uuid.uuid4()
+                video_path = f"{TMP_DIR}/{trial_id}.mp4"
+                os.makedirs(os.path.dirname(video_path), exist_ok=True)
+                imageio.mimsave(video_path, video, fps=15)
                 glb = postprocessing_utils.to_glb(
                     outputs['gaussian'][0],
                     outputs['mesh'][0],
                 )
                 glb.export(f"temper.glb")
                 print("processing mesh over...")
+                yield _chatbot,fig,"temper.glb",task_new,video_path,video_path
         except:
             print("processing mesh...bug")
+            yield _chatbot,fig,viewer_mesh,task_new,video_path
 def regenerate(_chatbot, task_history):
     if not task_history:
             task_new     = gr.State([])
         with gr.Column():
             viewer_plot  = gr.Plot(label="Voxel Visual",scale=0.5)
+            #viewer_mesh  = gr.Model3D(label="Mesh Visual", height=200,scale=1.0)
+            video_output = gr.Video(label="Generated 3D Asset", autoplay=True, loop=True, height=300)
+            viewer_mesh  = LitModel3D(label="Extracted GLB", exposure=20.0, height=300)
             examples_text = gr.Examples(
                 examples=[
     submit_btn.click(add_text, [chatbot, task_history, query,task_new],\
                                [chatbot, task_history,task_new]).then(
+        predict, [chatbot, task_history,viewer_plot,viewer_mesh,task_new,seed,top_k,top_p,temperature,video_output],\
+                 [chatbot,viewer_plot,viewer_mesh,task_new,video_output], show_progress=True
     )
     submit_btn.click(reset_user_input, [], [query])
     empty_bin.click(reset_state, [task_history], [chatbot], show_progress=True)