remotewith commited on
Commit
ce4c34e
·
verified ·
1 Parent(s): 2d10a26

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +100 -0
  2. specs_det.pth +3 -0
  3. unet.py +147 -0
app.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ from unet import UNet
6
+ from torchvision import transforms
7
+ from PIL import Image
8
+ from cvzone.FaceDetectionModule import FaceDetector
9
+ import cv2
10
+
11
+ detector_face=FaceDetector()
12
+
13
+ model = UNet(3,1)
14
+ model.load_state_dict(torch.load("specs_det.pth"))
15
+ model.eval()
16
+
17
+ def face_detect(full_image):
18
+ open_cv_image = np.array(full_image)
19
+ # Convert RGB to BGR
20
+ open_cv_image = open_cv_image[:, :, ::-1].copy()
21
+
22
+
23
+ face,bboxs=detector_face.findFaces(open_cv_image)
24
+
25
+ bbox = bboxs[0]['bbox']
26
+ x, y, w, h = bbox
27
+
28
+ cropped_image = open_cv_image[y-10:y+h+10, x-10:x+w+10]
29
+
30
+ img = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2RGB)
31
+ cropped_image = Image.fromarray(img)
32
+
33
+
34
+ # print(bboxs)
35
+ # print(face)
36
+
37
+ return cropped_image
38
+
39
+
40
+
41
+
42
+ def predict(image):
43
+
44
+ transform_input = transforms.Compose([
45
+ transforms.Resize((256,256)),
46
+ transforms.ToTensor(),
47
+ ])
48
+
49
+ transform_output = transforms.Compose([
50
+ transforms.Resize((256,256)),
51
+ ])
52
+
53
+ image = face_detect(image)
54
+
55
+
56
+ with torch.no_grad():
57
+
58
+
59
+
60
+ if transform_input:
61
+ image = transform_input(image)
62
+ image = image.unsqueeze(0)
63
+
64
+ image = image.to(next(model.parameters()).device)
65
+
66
+ output = model(image)
67
+
68
+ output = torch.sigmoid(output)
69
+
70
+ output = output.squeeze().cpu().numpy()
71
+
72
+ output = (output > 0.5).astype(np.uint8)
73
+
74
+ output = Image.fromarray(output * 255)
75
+
76
+ if transform_output:
77
+ output = transform_output(output)
78
+
79
+
80
+ # plt.imshow(output)
81
+ # plt.savefig("My figure")
82
+
83
+ return output
84
+
85
+
86
+ # Create the Gradio app
87
+ app = gr.Interface(
88
+ fn=predict,
89
+ inputs=gr.Image(label="Input Image",type="pil"),
90
+ outputs=gr.Image(label="Image with Segmentation",type="pil"),
91
+ title = "Kamehamehaa",
92
+ description="Segment image on the basis of glasses of a person",
93
+ examples=[
94
+ 'face-synthetics-glasses/test/images/000368.jpg',
95
+ 'face-synthetics-glasses/test/images/000411.jpg'
96
+ ]
97
+ )
98
+
99
+ # Run the app
100
+ app.launch()
specs_det.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4830119864b178eedce19a793f0ece1f451a8d7f05966d5068c1b33b80254aea
3
+ size 124143163
unet.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.optim as optim
5
+ from torchvision import transforms
6
+ from torch.utils.data import DataLoader,Dataset
7
+ from PIL import Image
8
+
9
+
10
+ def double_convolution(in_channels, out_channels):
11
+
12
+ conv_op = nn.Sequential(
13
+ nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
14
+ nn.ReLU(inplace=True),
15
+ nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
16
+ nn.ReLU(inplace=True)
17
+ )
18
+ return conv_op
19
+
20
+
21
+ class UNet(nn.Module):
22
+ def __init__(self, in_channels,out_channels):
23
+ super(UNet, self).__init__()
24
+ self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
25
+
26
+ self.down_convolution_1 = double_convolution(in_channels, 64)
27
+ self.down_convolution_2 = double_convolution(64, 128)
28
+ self.down_convolution_3 = double_convolution(128, 256)
29
+ self.down_convolution_4 = double_convolution(256, 512)
30
+ self.down_convolution_5 = double_convolution(512, 1024)
31
+
32
+ self.up_transpose_1 = nn.ConvTranspose2d(
33
+ in_channels=1024, out_channels=512,
34
+ kernel_size=2,
35
+ stride=2)
36
+
37
+ self.up_convolution_1 = double_convolution(1024, 512)
38
+ self.up_transpose_2 = nn.ConvTranspose2d(
39
+ in_channels=512, out_channels=256,
40
+ kernel_size=2,
41
+ stride=2)
42
+ self.up_convolution_2 = double_convolution(512, 256)
43
+ self.up_transpose_3 = nn.ConvTranspose2d(
44
+ in_channels=256, out_channels=128,
45
+ kernel_size=2,
46
+ stride=2)
47
+ self.up_convolution_3 = double_convolution(256, 128)
48
+ self.up_transpose_4 = nn.ConvTranspose2d(
49
+ in_channels=128, out_channels=64,
50
+ kernel_size=2,
51
+ stride=2)
52
+ self.up_convolution_4 = double_convolution(128, 64)
53
+
54
+ self.out = nn.Conv2d(
55
+ in_channels=64, out_channels=out_channels,
56
+ kernel_size=1
57
+ )
58
+ def forward(self, x):
59
+ down_1 = self.down_convolution_1(x)
60
+ down_2 = self.max_pool2d(down_1)
61
+ down_3 = self.down_convolution_2(down_2)
62
+ down_4 = self.max_pool2d(down_3)
63
+ down_5 = self.down_convolution_3(down_4)
64
+ down_6 = self.max_pool2d(down_5)
65
+ down_7 = self.down_convolution_4(down_6)
66
+ down_8 = self.max_pool2d(down_7)
67
+ down_9 = self.down_convolution_5(down_8)
68
+
69
+
70
+ up_1 = self.up_transpose_1(down_9)
71
+ x = self.up_convolution_1(torch.cat([down_7, up_1], 1))
72
+ up_2 = self.up_transpose_2(x)
73
+ x = self.up_convolution_2(torch.cat([down_5, up_2], 1))
74
+ up_3 = self.up_transpose_3(x)
75
+ x = self.up_convolution_3(torch.cat([down_3, up_3], 1))
76
+ up_4 = self.up_transpose_4(x)
77
+ x = self.up_convolution_4(torch.cat([down_1, up_4], 1))
78
+ out = self.out(x)
79
+ return out
80
+
81
+
82
+ class CustomDataset(Dataset):
83
+ def __init__(self, image_dir, mask_dir, transform=None):
84
+ self.image_dir = image_dir
85
+ self.mask_dir = mask_dir
86
+ self.transform = transform
87
+ self.image_filenames = os.listdir(image_dir)
88
+ self.mask_filenames = os.listdir(mask_dir)
89
+
90
+ def __len__(self):
91
+ return len(self.image_filenames)
92
+
93
+ def __getitem__(self , idx):
94
+ image_path = os.path.join(self.image_dir, self.image_filenames[idx])
95
+ mask_path = os.path.join(self.mask_dir, self.mask_filenames[idx])
96
+
97
+ image = Image.open(image_path).convert("RGB")
98
+ mask = Image.open(mask_path).convert("L")
99
+
100
+ if self.transform:
101
+ image = self.transform(image)
102
+ mask = self.transform(mask)
103
+
104
+ return image,mask
105
+
106
+
107
+
108
+ def train_model(model, dataloader, criterion, optimizer, num_epochs=25):
109
+ for epoch in range(num_epochs):
110
+ model.train()
111
+ running_loss = 0.0
112
+ for images,masks in dataloader:
113
+
114
+ optimizer.zero_grad()
115
+ outputs = model(images)
116
+ loss = criterion(outputs, masks)
117
+ loss.backward()
118
+ optimizer.step()
119
+ running_loss +=loss.item()
120
+
121
+ print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(dataloader):.4f}')
122
+
123
+
124
+
125
+ if __name__ == "__main__":
126
+
127
+ transform = transforms.Compose([
128
+ transforms.Resize((256,256)),
129
+ transforms.ToTensor(),
130
+ ])
131
+
132
+ image_dir = "face-synthetics-glasses/train/images"
133
+ mask_dir = "face-synthetics-glasses/train/masks"
134
+
135
+ dataset = CustomDataset(image_dir , mask_dir ,transform=transform)
136
+ dataloader = DataLoader(dataset,batch_size=2,shuffle=True)
137
+
138
+ model = UNet(3,1)
139
+ criterion = nn.BCEWithLogitsLoss()
140
+ optimizer = optim.Adam(model.parameters(),lr=0.001)
141
+ print("moving ahead")
142
+
143
+ # train_model(model,dataloader,criterion,optimizer,num_epochs=25)
144
+
145
+ # torch.save(model.state_dict(),"base_bat_ball.pth")
146
+
147
+