Spaces:
Running
Running
Bachmann Roman Christian
commited on
Commit
·
758da21
1
Parent(s):
100478a
Changed number of tokens to percentages of tokens
Browse files
app.py
CHANGED
@@ -292,6 +292,11 @@ def plot_predictions(input_dict, preds, masks, image_size=224):
|
|
292 |
|
293 |
|
294 |
def inference(img, num_tokens, manual_mode, num_rgb, num_depth, num_semseg, seed):
|
|
|
|
|
|
|
|
|
|
|
295 |
im = Image.open(img)
|
296 |
|
297 |
# Center crop and resize RGB
|
@@ -367,7 +372,7 @@ title = "MultiMAE"
|
|
367 |
description = "Gradio demo for MultiMAE: Multi-modal Multi-task Masked Autoencoders. \
|
368 |
Upload your own images or try one of the examples below to explore the multi-modal masked reconstruction of a pre-trained MultiMAE model. \
|
369 |
Uploaded images are pseudo labeled using a DPT trained on Omnidata depth, and a Mask2Former trained on COCO. \
|
370 |
-
Choose the
|
371 |
|
372 |
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2204.01678' \
|
373 |
target='_blank'>MultiMAE: Multi-modal Multi-task Masked Autoencoders</a> | \
|
@@ -381,20 +386,20 @@ os.system("wget https://i.imgur.com/KTKgYKi.jpg")
|
|
381 |
os.system("wget https://i.imgur.com/lWYuRI7.jpg")
|
382 |
|
383 |
examples = [
|
384 |
-
['c9ObJdK.jpg',
|
385 |
-
['KTKgYKi.jpg',
|
386 |
-
['lWYuRI7.jpg',
|
387 |
]
|
388 |
|
389 |
gr.Interface(
|
390 |
fn=inference,
|
391 |
inputs=[
|
392 |
gr.inputs.Image(label='RGB input image', type='filepath'),
|
393 |
-
gr.inputs.Slider(label='
|
394 |
gr.inputs.Checkbox(label='Manual mode: Check this to manually set the number of input tokens per modality using the sliders below', default=False),
|
395 |
-
gr.inputs.Slider(label='
|
396 |
-
gr.inputs.Slider(label='
|
397 |
-
gr.inputs.Slider(label='
|
398 |
gr.inputs.Number(label='Random seed: Change this to sample different masks (for manual mode only)', default=0),
|
399 |
],
|
400 |
outputs=[
|
|
|
292 |
|
293 |
|
294 |
def inference(img, num_tokens, manual_mode, num_rgb, num_depth, num_semseg, seed):
|
295 |
+
num_tokens = int(588 * num_tokens / 100.0)
|
296 |
+
num_rgb = int(196 * num_rgb / 100.0)
|
297 |
+
num_depth = int(196 * num_depth / 100.0)
|
298 |
+
num_semseg = int(196 * num_semseg / 100.0)
|
299 |
+
|
300 |
im = Image.open(img)
|
301 |
|
302 |
# Center crop and resize RGB
|
|
|
372 |
description = "Gradio demo for MultiMAE: Multi-modal Multi-task Masked Autoencoders. \
|
373 |
Upload your own images or try one of the examples below to explore the multi-modal masked reconstruction of a pre-trained MultiMAE model. \
|
374 |
Uploaded images are pseudo labeled using a DPT trained on Omnidata depth, and a Mask2Former trained on COCO. \
|
375 |
+
Choose the percentage of visible tokens using the sliders below and see how MultiMAE reconstructs the modalities!"
|
376 |
|
377 |
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2204.01678' \
|
378 |
target='_blank'>MultiMAE: Multi-modal Multi-task Masked Autoencoders</a> | \
|
|
|
386 |
os.system("wget https://i.imgur.com/lWYuRI7.jpg")
|
387 |
|
388 |
examples = [
|
389 |
+
['c9ObJdK.jpg', 15, False, 15, 15, 15, 0],
|
390 |
+
['KTKgYKi.jpg', 15, False, 15, 15, 15, 0],
|
391 |
+
['lWYuRI7.jpg', 15, False, 15, 15, 15, 0],
|
392 |
]
|
393 |
|
394 |
gr.Interface(
|
395 |
fn=inference,
|
396 |
inputs=[
|
397 |
gr.inputs.Image(label='RGB input image', type='filepath'),
|
398 |
+
gr.inputs.Slider(label='Percentage of input tokens', default=15, step=0.1, minimum=0, maximum=100),
|
399 |
gr.inputs.Checkbox(label='Manual mode: Check this to manually set the number of input tokens per modality using the sliders below', default=False),
|
400 |
+
gr.inputs.Slider(label='Percentage of RGB input tokens (for manual mode only)', default=15, step=0.1, minimum=0, maximum=100),
|
401 |
+
gr.inputs.Slider(label='Percentage of depth input tokens (for manual mode only)', default=15, step=0.1, minimum=0, maximum=100),
|
402 |
+
gr.inputs.Slider(label='Percentage of semantic input tokens (for manual mode only)', default=15, step=0.1, minimum=0, maximum=100),
|
403 |
gr.inputs.Number(label='Random seed: Change this to sample different masks (for manual mode only)', default=0),
|
404 |
],
|
405 |
outputs=[
|