Spaces:
Sleeping
Sleeping
Init space
Browse files
app.py
ADDED
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import torch
|
3 |
+
from PIL import Image
|
4 |
+
from torchvision import transforms
|
5 |
+
import numpy as np
|
6 |
+
from matplotlib import pyplot as plt
|
7 |
+
from torch.utils.data import Dataset, DataLoader
|
8 |
+
import evaluate
|
9 |
+
from torch import nn
|
10 |
+
from transformers import SegformerForSemanticSegmentation
|
11 |
+
import sys
|
12 |
+
import io
|
13 |
+
|
14 |
+
|
15 |
+
###################
|
16 |
+
# Setup label names
|
17 |
+
target_list = ['Crack', 'ACrack', 'Wetspot', 'Efflorescence', 'Rust', 'Rockpocket', 'Hollowareas', 'Cavity',
|
18 |
+
'Spalling', 'Graffiti', 'Weathering', 'Restformwork', 'ExposedRebars',
|
19 |
+
'Bearing', 'EJoint', 'Drainage', 'PEquipment', 'JTape', 'WConccor']
|
20 |
+
classes, nclasses = target_list, len(target_list)
|
21 |
+
label2id = dict(zip(classes, range(nclasses)))
|
22 |
+
id2label = dict(zip(range(nclasses), classes))
|
23 |
+
|
24 |
+
############
|
25 |
+
# Load model
|
26 |
+
device = torch.device('cpu')
|
27 |
+
segformer = SegformerForSemanticSegmentation.from_pretrained("nvidia/mit-b1",
|
28 |
+
id2label=id2label,
|
29 |
+
label2id=label2id)
|
30 |
+
|
31 |
+
# SegModel
|
32 |
+
class SegModel(nn.Module):
|
33 |
+
def __init__(self, segformer):
|
34 |
+
super(SegModel, self).__init__()
|
35 |
+
self.segformer = segformer
|
36 |
+
self.upsample = nn.Upsample(scale_factor=4, mode='nearest')
|
37 |
+
|
38 |
+
def forward(self, x):
|
39 |
+
return self.upsample(self.segformer(x).logits)
|
40 |
+
|
41 |
+
model = SegModel(segformer)
|
42 |
+
path = "runs/2023-08-31_rich-paper-12/best_model_cpu.pth"
|
43 |
+
print(f"Load Segformer weights from {path}")
|
44 |
+
#model = model.load_state_dict(torch.load(path, map_location=device))
|
45 |
+
model = torch.load(path)
|
46 |
+
model.eval()
|
47 |
+
|
48 |
+
##################
|
49 |
+
# Image preprocess
|
50 |
+
##################
|
51 |
+
|
52 |
+
to_tensor = transforms.ToTensor()
|
53 |
+
resize = transforms.Resize((512, 512))
|
54 |
+
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
|
55 |
+
std=[0.229, 0.224, 0.225])
|
56 |
+
|
57 |
+
def process_pil(img):
|
58 |
+
img = to_tensor(img)
|
59 |
+
img = resize(img)
|
60 |
+
img = normalize(img)
|
61 |
+
return img
|
62 |
+
|
63 |
+
###########
|
64 |
+
# Inference
|
65 |
+
|
66 |
+
def inference(img, name):
|
67 |
+
img = process_pil(img)
|
68 |
+
mask = model(img.unsqueeze(0)) # we need a batch, hence we introduce an extra dimenation at position 0 (unsqueeze)
|
69 |
+
mask = mask[0]
|
70 |
+
|
71 |
+
# Get probability values (logits to probs)
|
72 |
+
mask_probs = torch.sigmoid(mask)
|
73 |
+
mask_probs = mask_probs.detach().numpy()
|
74 |
+
mask_probs.shape
|
75 |
+
|
76 |
+
# Make binary mask
|
77 |
+
THRESHOLD = 0.5
|
78 |
+
mask_preds = mask_probs > THRESHOLD
|
79 |
+
|
80 |
+
# All combined
|
81 |
+
mask_all = mask_preds.sum(axis=0)
|
82 |
+
mask_all = np.expand_dims(mask_all, axis=0)
|
83 |
+
mask_all.shape
|
84 |
+
|
85 |
+
# Concat all combined with normal preds
|
86 |
+
mask_preds = np.concatenate((mask_all, mask_preds),axis=0)
|
87 |
+
labs = ["ALL"] + target_list
|
88 |
+
|
89 |
+
fig, axes = plt.subplots(5, 4, figsize = (10,10))
|
90 |
+
|
91 |
+
for i, ax in enumerate(axes.flat):
|
92 |
+
label = labs[i]
|
93 |
+
ax.imshow(mask_preds[i])
|
94 |
+
ax.set_title(label)
|
95 |
+
|
96 |
+
plt.tight_layout()
|
97 |
+
|
98 |
+
|
99 |
+
# plt to PIL
|
100 |
+
img_buf = io.BytesIO()
|
101 |
+
fig.savefig(img_buf, format='png')
|
102 |
+
im = Image.open(img_buf)
|
103 |
+
return im
|
104 |
+
|
105 |
+
|
106 |
+
|
107 |
+
title = "dacl-challenge @ WACV2024"
|
108 |
+
description = """
|
109 |
+
<b>
|
110 |
+
<p style="text-align:center">
|
111 |
+
<a href='https://twitter.com/dacl_ai' target='_blank'>Twitter</a><a href='https://x.com/dacl_ai' target='_blank'>/X</a> |
|
112 |
+
<a href='https://wacv2024.thecvf.com/workshops/' target='_blank'>WACV2024</a> |
|
113 |
+
<a href='https://arxiv.org/abs/2309.00460' target='_blank'>arXiv</a> |
|
114 |
+
<a href='https://github.com/phiyodr/dacl10k-toolkit' target='_blank'>Python Toolkit</a> |
|
115 |
+
<a href='https://try.fiftyone.ai/datasets/dacl10k/samples' target='_blank'>voxel51.com</a> |
|
116 |
+
<a href='https://eval.ai/web/challenges/challenge-page/2130/overview' target='_blank'>eval.ai</a> |
|
117 |
+
<a href='https://dacl.ai/workshop.html' target='_blank'>dacl.ai workshop page</a>
|
118 |
+
</p>
|
119 |
+
|
120 |
+
<p></p>
|
121 |
+
<p>π The challenge uses the dacl10k dataset, which stands for <i>damage classification 10k images</i> and is a <b>multi-label semantic segmentation dataset</b> for 19 classes (13 damages and 6 objects) present on bridges.</p>
|
122 |
+
<p></p>
|
123 |
+
|
124 |
+
<p>π The dataset is used in the <a href='https://eval.ai/web/challenges/challenge-page/2130/overview' target='_blank'>dacl-challenge</a> associated with the "<a href='https://dacl.ai/workshop' target='_blank'>1st Workshop on Vision-Based Structural Inspections in Civil Engineering</a>" at <a href='https://wacv2024.thecvf.com/workshops/' target='_blank'>WACV2024</a>.</p>
|
125 |
+
<p></p>
|
126 |
+
</b>
|
127 |
+
|
128 |
+
<p>Civil engineering structures such as power plants, sewers, and bridges form essential components of the public infrastructure. It is mandatory to keep these structures in a safe and operational state. In order to ensure this, they are frequently inspected where the current recognition and documentation of defects and building components is mostly carried out manually. A failure of individual structures results in enormous costs. For example, the economic costs caused by the closure of a bridge due to congestion is many times the cost of the bridge itself and its maintenance.</p>
|
129 |
+
<p></p>
|
130 |
+
|
131 |
+
<p>Recent advancements in hardware and software offer great potential for increasing the quality, traceability, and efficiency of the structural inspection process. In particular, methods from the field of computer vision play an important role. The new techniques support the inspection engineer at the building site, raising quality and efficiency of the inspection. There is a high demand worldwide for the automation of structural inspections in the areas of building construction, bridge construction, tunnel construction, sewage plants, and other critical infrastructures.</p>
|
132 |
+
<p></p>
|
133 |
+
|
134 |
+
<p>In the β<a href='https://dacl.ai/workshop' target='_blank'>1st Workshop on Vision-Based Structural Inspections in Civil Engineering</a>,β approaches utilizing computer vision for analyzing and assessing civil engineering structures will be explored. The workshop will provide a platform for experts from both the academic and application community. The core of the workshop is the β<a href='https://eval.ai/web/challenges/challenge-page/2130/overview' target='_blank'>dacl-challenge</a>,β which aims to find the best models for recognizing bridge defects and bridge components by means of semantic segmentation. The challenge is based on the β<b>dacl10k</b>β dataset, a novel, real-world, large-scale benchmark for multi-label semantic segmentation that distinguishes between <b>13 defect types</b> and <b>six building components</b>. The workshop will take place at the <a href='https://wacv2024.thecvf.com/workshops/' target='_blank'>IEEE/CVF Winter Conference on Applications of Computer Vision (WACV) 2024</a>.</p>
|
135 |
+
<p></p>
|
136 |
+
|
137 |
+
<p>Details:
|
138 |
+
<ul>
|
139 |
+
<li>Model: <a href='https://huggingface.co/nvidia/mit-b1' target='_blank'>SegFormer mit-b1</a>, trained on resized 512x512 images for (only) 10 epochs.</li>
|
140 |
+
<li>Label description of dacl10k dataset: "A.3. Class descriptions" in <a href='https://arxiv.org/pdf/2309.00460.pdf' target='_blank'>J. Flotzinger, P.J. RΓΆsch, T. Braml: "dacl10k: Benchmark for Semantic Bridge Damage Segmentation".</a></li>
|
141 |
+
</ul>
|
142 |
+
|
143 |
+
"""
|
144 |
+
|
145 |
+
article = "<p style='text-align: center'><a href='https://github.com/phiyodr/dacl10k-toolkit' target='_blank'>Github Repo</a></p>"
|
146 |
+
examples=[['assets/dacl10k_v2_validation_0037.jpg', 'dacl10k_v2_validation_0037.jpg'],['assets/dacl10k_v2_validation_0068.jpg','dacl10k_v2_validation_0068.jpg'], ['assets/dacl10k_v2_validation_0053.jpg', 'dacl10k_v2_validation_0053.jpg']]
|
147 |
+
|
148 |
+
demo = gr.Interface(
|
149 |
+
fn=inference,
|
150 |
+
inputs=gr.inputs.Image(type="pil"),
|
151 |
+
outputs=gr.outputs.Image(type="pil"),
|
152 |
+
title=title,
|
153 |
+
description=description,
|
154 |
+
article=article,
|
155 |
+
examples=examples)
|
156 |
+
|
157 |
+
demo.launch()
|
assets/dacl10k_v2_validation_0037.jpg
ADDED
assets/dacl10k_v2_validation_0053.jpg
ADDED
assets/dacl10k_v2_validation_0068.jpg
ADDED
runs/2023-08-31_rich-paper-12/best_model_cpu.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:166b778bdbc57ef2bfd5f71bc6ae724bf598d1a4c6a1b69afa4fbd936f2d965b
|
3 |
+
size 54833953
|