Jie Hu commited on
Commit
6522999
·
1 Parent(s): d17054d

init project

Browse files
Files changed (1) hide show
  1. app.py +118 -118
app.py CHANGED
@@ -44,7 +44,7 @@ device = 'cuda' if torch.cuda.is_available() else 'cpu'
44
  # pe3r = Models(device)
45
  MAST3R_CKP = 'naver/MASt3R_ViTLarge_BaseDecoder_512_catmlpdpt_metric'
46
  mast3r = AsymmetricMASt3R.from_pretrained(MAST3R_CKP).to(device)
47
-
48
 
49
 
50
  def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world, cam_size=0.05,
@@ -114,138 +114,138 @@ def get_3D_model_from_scene(outdir, scene, min_conf_thr=3, as_pointcloud=False,
114
  return _convert_scene_output_to_glb(outdir, rgbimg, pts3d, msk, focals, cams2world, as_pointcloud=as_pointcloud,
115
  transparent_cams=transparent_cams, cam_size=cam_size)
116
 
117
- def mask_nms(masks, threshold=0.8):
118
- keep = []
119
- mask_num = len(masks)
120
- suppressed = np.zeros((mask_num), dtype=np.int64)
121
- for i in range(mask_num):
122
- if suppressed[i] == 1:
123
- continue
124
- keep.append(i)
125
- for j in range(i + 1, mask_num):
126
- if suppressed[j] == 1:
127
- continue
128
- intersection = (masks[i] & masks[j]).sum()
129
- if min(intersection / masks[i].sum(), intersection / masks[j].sum()) > threshold:
130
- suppressed[j] = 1
131
- return keep
132
-
133
- def filter(masks, keep):
134
- ret = []
135
- for i, m in enumerate(masks):
136
- if i in keep: ret.append(m)
137
- return ret
138
-
139
- def mask_to_box(mask):
140
- if mask.sum() == 0:
141
- return np.array([0, 0, 0, 0])
142
 
143
- # Get the rows and columns where the mask is 1
144
- rows = np.any(mask, axis=1)
145
- cols = np.any(mask, axis=0)
146
 
147
- # Get top, bottom, left, right edges
148
- top = np.argmax(rows)
149
- bottom = len(rows) - 1 - np.argmax(np.flip(rows))
150
- left = np.argmax(cols)
151
- right = len(cols) - 1 - np.argmax(np.flip(cols))
152
 
153
- return np.array([left, top, right, bottom])
154
-
155
- def box_xyxy_to_xywh(box_xyxy):
156
- box_xywh = deepcopy(box_xyxy)
157
- box_xywh[2] = box_xywh[2] - box_xywh[0]
158
- box_xywh[3] = box_xywh[3] - box_xywh[1]
159
- return box_xywh
160
-
161
- def get_seg_img(mask, box, image):
162
- image = image.copy()
163
- x, y, w, h = box
164
- # image[mask == 0] = np.array([0, 0, 0], dtype=np.uint8)
165
- box_area = w * h
166
- mask_area = mask.sum()
167
- if 1 - (mask_area / box_area) < 0.2:
168
- image[mask == 0] = np.array([0, 0, 0], dtype=np.uint8)
169
- else:
170
- random_values = np.random.randint(0, 255, size=image.shape, dtype=np.uint8)
171
- image[mask == 0] = random_values[mask == 0]
172
- seg_img = image[y:y+h, x:x+w, ...]
173
- return seg_img
174
-
175
- def pad_img(img):
176
- h, w, _ = img.shape
177
- l = max(w,h)
178
- pad = np.zeros((l,l,3), dtype=np.uint8) #
179
- if h > w:
180
- pad[:,(h-w)//2:(h-w)//2 + w, :] = img
181
- else:
182
- pad[(w-h)//2:(w-h)//2 + h, :, :] = img
183
- return pad
184
-
185
- def batch_iterator(batch_size: int, *args) -> Generator[List[Any], None, None]:
186
- assert len(args) > 0 and all(
187
- len(a) == len(args[0]) for a in args
188
- ), "Batched iteration must have inputs of all the same size."
189
- n_batches = len(args[0]) // batch_size + int(len(args[0]) % batch_size != 0)
190
- for b in range(n_batches):
191
- yield [arg[b * batch_size : (b + 1) * batch_size] for arg in args]
192
-
193
- def slerp(u1, u2, t):
194
- """
195
- Perform spherical linear interpolation (Slerp) between two unit vectors.
196
 
197
- Args:
198
- - u1 (torch.Tensor): First unit vector, shape (1024,)
199
- - u2 (torch.Tensor): Second unit vector, shape (1024,)
200
- - t (float): Interpolation parameter
201
 
202
- Returns:
203
- - torch.Tensor: Interpolated vector, shape (1024,)
204
- """
205
- # Compute the dot product
206
- dot_product = torch.sum(u1 * u2)
207
 
208
- # Ensure the dot product is within the valid range [-1, 1]
209
- dot_product = torch.clamp(dot_product, -1.0, 1.0)
210
 
211
- # Compute the angle between the vectors
212
- theta = torch.acos(dot_product)
213
 
214
- # Compute the coefficients for the interpolation
215
- sin_theta = torch.sin(theta)
216
- if sin_theta == 0:
217
- # Vectors are parallel, return a linear interpolation
218
- return u1 + t * (u2 - u1)
219
 
220
- s1 = torch.sin((1 - t) * theta) / sin_theta
221
- s2 = torch.sin(t * theta) / sin_theta
222
 
223
- # Perform the interpolation
224
- return s1 * u1 + s2 * u2
225
 
226
- def slerp_multiple(vectors, t_values):
227
- """
228
- Perform spherical linear interpolation (Slerp) for multiple vectors.
229
 
230
- Args:
231
- - vectors (torch.Tensor): Tensor of vectors, shape (n, 1024)
232
- - a_values (torch.Tensor): Tensor of values corresponding to each vector, shape (n,)
233
 
234
- Returns:
235
- - torch.Tensor: Interpolated vector, shape (1024,)
236
- """
237
- n = vectors.shape[0]
238
 
239
- # Initialize the interpolated vector with the first vector
240
- interpolated_vector = vectors[0]
241
 
242
- # Perform Slerp iteratively
243
- for i in range(1, n):
244
- # Perform Slerp between the current interpolated vector and the next vector
245
- t = t_values[i] / (t_values[i] + t_values[i-1])
246
- interpolated_vector = slerp(interpolated_vector, vectors[i], t)
247
 
248
- return interpolated_vector
249
 
250
  # @torch.no_grad
251
  # def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, original_size, input_size, transform):
@@ -438,7 +438,7 @@ def slerp_multiple(vectors, t_values):
438
 
439
  # return cog_seg_maps, rev_cog_seg_maps, multi_view_clip_feats
440
 
441
- @spaces.GPU(duration=180)
442
  def get_reconstructed_scene(outdir, filelist, schedule, niter, min_conf_thr,
443
  as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size,
444
  scenegraph_type, winsize, refid):
 
44
  # pe3r = Models(device)
45
  MAST3R_CKP = 'naver/MASt3R_ViTLarge_BaseDecoder_512_catmlpdpt_metric'
46
  mast3r = AsymmetricMASt3R.from_pretrained(MAST3R_CKP).to(device)
47
+ print(device)
48
 
49
 
50
  def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world, cam_size=0.05,
 
114
  return _convert_scene_output_to_glb(outdir, rgbimg, pts3d, msk, focals, cams2world, as_pointcloud=as_pointcloud,
115
  transparent_cams=transparent_cams, cam_size=cam_size)
116
 
117
+ # def mask_nms(masks, threshold=0.8):
118
+ # keep = []
119
+ # mask_num = len(masks)
120
+ # suppressed = np.zeros((mask_num), dtype=np.int64)
121
+ # for i in range(mask_num):
122
+ # if suppressed[i] == 1:
123
+ # continue
124
+ # keep.append(i)
125
+ # for j in range(i + 1, mask_num):
126
+ # if suppressed[j] == 1:
127
+ # continue
128
+ # intersection = (masks[i] & masks[j]).sum()
129
+ # if min(intersection / masks[i].sum(), intersection / masks[j].sum()) > threshold:
130
+ # suppressed[j] = 1
131
+ # return keep
132
+
133
+ # def filter(masks, keep):
134
+ # ret = []
135
+ # for i, m in enumerate(masks):
136
+ # if i in keep: ret.append(m)
137
+ # return ret
138
+
139
+ # def mask_to_box(mask):
140
+ # if mask.sum() == 0:
141
+ # return np.array([0, 0, 0, 0])
142
 
143
+ # # Get the rows and columns where the mask is 1
144
+ # rows = np.any(mask, axis=1)
145
+ # cols = np.any(mask, axis=0)
146
 
147
+ # # Get top, bottom, left, right edges
148
+ # top = np.argmax(rows)
149
+ # bottom = len(rows) - 1 - np.argmax(np.flip(rows))
150
+ # left = np.argmax(cols)
151
+ # right = len(cols) - 1 - np.argmax(np.flip(cols))
152
 
153
+ # return np.array([left, top, right, bottom])
154
+
155
+ # def box_xyxy_to_xywh(box_xyxy):
156
+ # box_xywh = deepcopy(box_xyxy)
157
+ # box_xywh[2] = box_xywh[2] - box_xywh[0]
158
+ # box_xywh[3] = box_xywh[3] - box_xywh[1]
159
+ # return box_xywh
160
+
161
+ # def get_seg_img(mask, box, image):
162
+ # image = image.copy()
163
+ # x, y, w, h = box
164
+ # # image[mask == 0] = np.array([0, 0, 0], dtype=np.uint8)
165
+ # box_area = w * h
166
+ # mask_area = mask.sum()
167
+ # if 1 - (mask_area / box_area) < 0.2:
168
+ # image[mask == 0] = np.array([0, 0, 0], dtype=np.uint8)
169
+ # else:
170
+ # random_values = np.random.randint(0, 255, size=image.shape, dtype=np.uint8)
171
+ # image[mask == 0] = random_values[mask == 0]
172
+ # seg_img = image[y:y+h, x:x+w, ...]
173
+ # return seg_img
174
+
175
+ # def pad_img(img):
176
+ # h, w, _ = img.shape
177
+ # l = max(w,h)
178
+ # pad = np.zeros((l,l,3), dtype=np.uint8) #
179
+ # if h > w:
180
+ # pad[:,(h-w)//2:(h-w)//2 + w, :] = img
181
+ # else:
182
+ # pad[(w-h)//2:(w-h)//2 + h, :, :] = img
183
+ # return pad
184
+
185
+ # def batch_iterator(batch_size: int, *args) -> Generator[List[Any], None, None]:
186
+ # assert len(args) > 0 and all(
187
+ # len(a) == len(args[0]) for a in args
188
+ # ), "Batched iteration must have inputs of all the same size."
189
+ # n_batches = len(args[0]) // batch_size + int(len(args[0]) % batch_size != 0)
190
+ # for b in range(n_batches):
191
+ # yield [arg[b * batch_size : (b + 1) * batch_size] for arg in args]
192
+
193
+ # def slerp(u1, u2, t):
194
+ # """
195
+ # Perform spherical linear interpolation (Slerp) between two unit vectors.
196
 
197
+ # Args:
198
+ # - u1 (torch.Tensor): First unit vector, shape (1024,)
199
+ # - u2 (torch.Tensor): Second unit vector, shape (1024,)
200
+ # - t (float): Interpolation parameter
201
 
202
+ # Returns:
203
+ # - torch.Tensor: Interpolated vector, shape (1024,)
204
+ # """
205
+ # # Compute the dot product
206
+ # dot_product = torch.sum(u1 * u2)
207
 
208
+ # # Ensure the dot product is within the valid range [-1, 1]
209
+ # dot_product = torch.clamp(dot_product, -1.0, 1.0)
210
 
211
+ # # Compute the angle between the vectors
212
+ # theta = torch.acos(dot_product)
213
 
214
+ # # Compute the coefficients for the interpolation
215
+ # sin_theta = torch.sin(theta)
216
+ # if sin_theta == 0:
217
+ # # Vectors are parallel, return a linear interpolation
218
+ # return u1 + t * (u2 - u1)
219
 
220
+ # s1 = torch.sin((1 - t) * theta) / sin_theta
221
+ # s2 = torch.sin(t * theta) / sin_theta
222
 
223
+ # # Perform the interpolation
224
+ # return s1 * u1 + s2 * u2
225
 
226
+ # def slerp_multiple(vectors, t_values):
227
+ # """
228
+ # Perform spherical linear interpolation (Slerp) for multiple vectors.
229
 
230
+ # Args:
231
+ # - vectors (torch.Tensor): Tensor of vectors, shape (n, 1024)
232
+ # - a_values (torch.Tensor): Tensor of values corresponding to each vector, shape (n,)
233
 
234
+ # Returns:
235
+ # - torch.Tensor: Interpolated vector, shape (1024,)
236
+ # """
237
+ # n = vectors.shape[0]
238
 
239
+ # # Initialize the interpolated vector with the first vector
240
+ # interpolated_vector = vectors[0]
241
 
242
+ # # Perform Slerp iteratively
243
+ # for i in range(1, n):
244
+ # # Perform Slerp between the current interpolated vector and the next vector
245
+ # t = t_values[i] / (t_values[i] + t_values[i-1])
246
+ # interpolated_vector = slerp(interpolated_vector, vectors[i], t)
247
 
248
+ # return interpolated_vector
249
 
250
  # @torch.no_grad
251
  # def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, original_size, input_size, transform):
 
438
 
439
  # return cog_seg_maps, rev_cog_seg_maps, multi_view_clip_feats
440
 
441
+ @spaces.GPU(duration=120)
442
  def get_reconstructed_scene(outdir, filelist, schedule, niter, min_conf_thr,
443
  as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size,
444
  scenegraph_type, winsize, refid):