huntrezz commited on
Commit
9771925
·
verified ·
1 Parent(s): e320b6c

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -0
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import torch
3
+ from transformers import DPTForDepthEstimation, DPTImageProcessor
4
+ import numpy as np
5
+ import time
6
+ import warnings
7
+ warnings.filterwarnings("ignore", message="It looks like you are trying to rescale already rescaled images.")
8
+
9
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
+ model = DPTForDepthEstimation.from_pretrained("Intel/dpt-swinv2-tiny-256", torch_dtype=torch.float16).to(device)
11
+ processor = DPTImageProcessor.from_pretrained("Intel/dpt-swinv2-tiny-256")
12
+
13
+ cap = cv2.VideoCapture(0)
14
+
15
+ def resize_image(image, target_size=(256, 256)):
16
+ return cv2.resize(image, target_size)
17
+
18
+
19
+
20
+ def manual_normalize(depth_map):
21
+ min_val = np.min(depth_map)
22
+ max_val = np.max(depth_map)
23
+ if min_val != max_val:
24
+ normalized = (depth_map - min_val) / (max_val - min_val)
25
+ return (normalized * 255).astype(np.uint8)
26
+ else:
27
+ return np.zeros_like(depth_map, dtype=np.uint8)
28
+
29
+ frame_skip = 4
30
+ frame_count = 0
31
+ color_map = cv2.applyColorMap(np.arange(256, dtype=np.uint8), cv2.COLORMAP_INFERNO)
32
+
33
+ prev_frame_time = 0
34
+
35
+ while True:
36
+ ret, frame = cap.read()
37
+ if not ret:
38
+ break
39
+
40
+ frame_count += 1
41
+ if frame_count % frame_skip != 0:
42
+ continue
43
+
44
+ rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
45
+ resized_frame = resize_image(rgb_frame)
46
+
47
+ inputs = processor(images=resized_frame, return_tensors="pt").to(device)
48
+ inputs = {k: v.to(torch.float16) for k, v in inputs.items()}
49
+
50
+ with torch.no_grad():
51
+ outputs = model(**inputs)
52
+ predicted_depth = outputs.predicted_depth
53
+
54
+ depth_map = predicted_depth.squeeze().cpu().numpy()
55
+
56
+ # Check Input Data
57
+ print(f"depth_map shape: {depth_map.shape}")
58
+ print(f"depth_map min: {np.min(depth_map)}, max: {np.max(depth_map)}")
59
+ print(f"depth_map dtype: {depth_map.dtype}")
60
+
61
+ # Handle invalid values
62
+ depth_map = np.nan_to_num(depth_map, nan=0.0, posinf=0.0, neginf=0.0)
63
+
64
+ # Ensure depth_map is in float32 format
65
+ depth_map = depth_map.astype(np.float32)
66
+
67
+ # Check for zero-sized arrays
68
+ if depth_map.size == 0:
69
+ print("Error: depth_map is empty")
70
+ depth_map = np.zeros((256, 256), dtype=np.uint8)
71
+ else:
72
+ # Handle empty or constant arrays
73
+ if np.any(depth_map) and np.min(depth_map) != np.max(depth_map):
74
+ depth_map = cv2.normalize(depth_map, None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U)
75
+ else:
76
+ depth_map = np.zeros_like(depth_map, dtype=np.uint8)
77
+
78
+ # Use manual normalization as a fallback
79
+ if np.all(depth_map == 0):
80
+ depth_map = manual_normalize(depth_map)
81
+
82
+ depth_map_colored = cv2.applyColorMap(depth_map, color_map)
83
+ depth_map_colored = cv2.resize(depth_map_colored, (frame.shape[1], frame.shape[0]))
84
+
85
+ combined = np.hstack((frame, depth_map_colored))
86
+
87
+ new_frame_time = time.time()
88
+ fps = 1 / (new_frame_time - prev_frame_time)
89
+ prev_frame_time = new_frame_time
90
+ cv2.putText(combined, f"FPS: {int(fps)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
91
+
92
+ cv2.imshow('Webcam and Depth Map', combined)
93
+
94
+ if cv2.waitKey(1) & 0xFF == ord('q'):
95
+ break
96
+
97
+ cap.release()
98
+ cv2.destroyAllWindows()