Spaces:

huntrezz
/

RealtimeMonocularDepth

Sleeping

App Files Files Community

huntrezz commited on Dec 2, 2024

Commit

4f1fd81

verified ·

1 Parent(s): c376cbb

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -56

app.py CHANGED Viewed

@@ -3,10 +3,7 @@ import torch
 import numpy as np
 from transformers import DPTForDepthEstimation, DPTImageProcessor
 import gradio as gr
-import torch.quantization
 import torch.nn.utils.prune as prune
-import asyncio
-import queue
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -20,16 +17,17 @@ parameters_to_prune = [
 prune.global_unstructured(
     parameters_to_prune,
     pruning_method=prune.L1Unstructured,
-    amount=0.3,  # Prune 30% of weights
 )
 for module, _ in parameters_to_prune:
     prune.remove(module, "weight")
-# Apply quantization after pruning
 model = torch.quantization.quantize_dynamic(
     model, {torch.nn.Linear, torch.nn.Conv2d}, dtype=torch.qint8
-).to(device)
 processor = DPTImageProcessor.from_pretrained("Intel/dpt-swinv2-tiny-256")
@@ -37,11 +35,8 @@ color_map = cv2.applyColorMap(np.arange(256, dtype=np.uint8), cv2.COLORMAP_INFER
 input_tensor = torch.zeros((1, 3, 128, 128), dtype=torch.float32, device=device)
-frame_queue = queue.Queue(maxsize=1)
-result_queue = queue.Queue(maxsize=1)
 def preprocess_image(image):
-    return cv2.resize(image, (128, 72), interpolation=cv2.INTER_AREA).transpose(2, 0, 1).astype(np.float32) / 255.0
 @torch.inference_mode()
 def process_frame(image):
@@ -52,55 +47,17 @@ def process_frame(image):
     predicted_depth = model(input_tensor).predicted_depth
     depth_map = predicted_depth.squeeze().cpu().numpy()
-    num_bins = 1000
-    depth_map = np.digitize(depth_map, bins=np.linspace(depth_map.min(), depth_map.max(), num_bins)) - 1
     depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
     depth_map = (depth_map * 255).astype(np.uint8)
     depth_map_colored = cv2.applyColorMap(depth_map, cv2.COLORMAP_INFERNO)
     return cv2.cvtColor(depth_map_colored, cv2.COLOR_BGR2RGB)
-async def capture_frames(webcam):
-    frame_count = 0
-    while True:
-        ret, frame = webcam.read()
-        if not ret:
-            break
-        frame_count += 1
-        if frame_count % 5 == 0:  # Process every 5th frame
-            if frame_queue.full():
-                frame_queue.get()  # Remove old frame if queue is full
-            frame_queue.put(frame)
-        await asyncio.sleep(0.01)  # Small delay to prevent blocking
-async def process_frames():
-    while True:
-        if not frame_queue.empty():
-            frame = frame_queue.get()
-            result = process_frame(frame)
-            if result_queue.full():
-                result_queue.get()  # Remove old result if queue is full
-            result_queue.put(result)
-        await asyncio.sleep(0.01)  # Small delay to prevent blocking
-def get_latest_frame():
-    if result_queue.empty():
-        return None
-    return result_queue.get()
-async def main():
-    webcam = cv2.VideoCapture(0)
-    asyncio.create_task(capture_frames(webcam))
-    asyncio.create_task(process_frames())
-    interface = gr.Interface(
-        fn=get_latest_frame,
-        inputs=None,
-        outputs="image",
-        live=True
-    )
-    await interface.launch()
-if __name__ == "__main__":
-    asyncio.run(main())

 import numpy as np
 from transformers import DPTForDepthEstimation, DPTImageProcessor
 import gradio as gr
 import torch.nn.utils.prune as prune
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 prune.global_unstructured(
     parameters_to_prune,
     pruning_method=prune.L1Unstructured,
+    amount=0.4,  # Prune 40% of weights
 )
 for module, _ in parameters_to_prune:
     prune.remove(module, "weight")
 model = torch.quantization.quantize_dynamic(
     model, {torch.nn.Linear, torch.nn.Conv2d}, dtype=torch.qint8
+)
+model = model.to(device)
 processor = DPTImageProcessor.from_pretrained("Intel/dpt-swinv2-tiny-256")
 input_tensor = torch.zeros((1, 3, 128, 128), dtype=torch.float32, device=device)
 def preprocess_image(image):
+    return cv2.resize(image, (128, 128), interpolation=cv2.INTER_AREA).transpose(2, 0, 1).astype(np.float32) / 255.0
 @torch.inference_mode()
 def process_frame(image):
     predicted_depth = model(input_tensor).predicted_depth
     depth_map = predicted_depth.squeeze().cpu().numpy()
     depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
     depth_map = (depth_map * 255).astype(np.uint8)
     depth_map_colored = cv2.applyColorMap(depth_map, cv2.COLORMAP_INFERNO)
     return cv2.cvtColor(depth_map_colored, cv2.COLOR_BGR2RGB)
+interface = gr.Interface(
+    fn=process_frame,
+    inputs=gr.Image(source="webcam", streaming=True),
+    outputs="image",
+    live=True
+)
+interface.launch()