Spaces:

remotewith
/

Specs_Segmentation

Sleeping

App Files Files Community

remotewith commited on Dec 24, 2024

Commit

ce4c34e

verified ·

1 Parent(s): 2d10a26

Upload 3 files

Browse files

Files changed (3) hide show

app.py +100 -0
specs_det.pth +3 -0
unet.py +147 -0

app.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import gradio as gr
+import torch
+import numpy as np
+import matplotlib.pyplot as plt
+from unet import UNet
+from torchvision import transforms
+from PIL import Image
+from cvzone.FaceDetectionModule import FaceDetector
+import cv2
+detector_face=FaceDetector()
+model = UNet(3,1)
+model.load_state_dict(torch.load("specs_det.pth"))
+model.eval()
+def face_detect(full_image):
+    open_cv_image = np.array(full_image)
+    # Convert RGB to BGR
+    open_cv_image = open_cv_image[:, :, ::-1].copy()
+    face,bboxs=detector_face.findFaces(open_cv_image)
+    bbox = bboxs[0]['bbox']
+    x, y, w, h = bbox
+    cropped_image = open_cv_image[y-10:y+h+10, x-10:x+w+10]
+    img = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2RGB)
+    cropped_image = Image.fromarray(img)
+    # print(bboxs)
+    # print(face)
+    return cropped_image
+def predict(image):
+    transform_input = transforms.Compose([
+        transforms.Resize((256,256)),
+        transforms.ToTensor(),
+    ])
+    transform_output = transforms.Compose([
+        transforms.Resize((256,256)),
+    ])
+    image = face_detect(image)
+    with torch.no_grad():
+        if transform_input:
+            image = transform_input(image)
+        image = image.unsqueeze(0)
+        image = image.to(next(model.parameters()).device)
+        output = model(image)
+        output = torch.sigmoid(output)
+        output = output.squeeze().cpu().numpy()
+        output = (output > 0.5).astype(np.uint8)
+        output = Image.fromarray(output * 255)
+        if transform_output:
+            output = transform_output(output)
+    # plt.imshow(output)
+    # plt.savefig("My figure")
+    return output
+# Create the Gradio app
+app = gr.Interface(
+    fn=predict,
+    inputs=gr.Image(label="Input Image",type="pil"),
+    outputs=gr.Image(label="Image with Segmentation",type="pil"),
+    title = "Kamehamehaa",
+    description="Segment image on the basis of glasses of a person",
+    examples=[
+        'face-synthetics-glasses/test/images/000368.jpg',
+        'face-synthetics-glasses/test/images/000411.jpg'
+    ]
+)
+# Run the app
+app.launch()

specs_det.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4830119864b178eedce19a793f0ece1f451a8d7f05966d5068c1b33b80254aea
+size 124143163

unet.py ADDED Viewed

	@@ -0,0 +1,147 @@

+import os
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torchvision import transforms
+from torch.utils.data import DataLoader,Dataset
+from PIL import Image
+def double_convolution(in_channels, out_channels):
+    conv_op = nn.Sequential(
+        nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
+        nn.ReLU(inplace=True),
+        nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
+        nn.ReLU(inplace=True)
+    )
+    return conv_op
+class UNet(nn.Module):
+    def __init__(self, in_channels,out_channels):
+        super(UNet, self).__init__()
+        self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)
+        self.down_convolution_1 = double_convolution(in_channels, 64)
+        self.down_convolution_2 = double_convolution(64, 128)
+        self.down_convolution_3 = double_convolution(128, 256)
+        self.down_convolution_4 = double_convolution(256, 512)
+        self.down_convolution_5 = double_convolution(512, 1024)
+        self.up_transpose_1 = nn.ConvTranspose2d(
+            in_channels=1024, out_channels=512,
+            kernel_size=2,
+            stride=2)
+        self.up_convolution_1 = double_convolution(1024, 512)
+        self.up_transpose_2 = nn.ConvTranspose2d(
+            in_channels=512, out_channels=256,
+            kernel_size=2,
+            stride=2)
+        self.up_convolution_2 = double_convolution(512, 256)
+        self.up_transpose_3 = nn.ConvTranspose2d(
+            in_channels=256, out_channels=128,
+            kernel_size=2,
+            stride=2)
+        self.up_convolution_3 = double_convolution(256, 128)
+        self.up_transpose_4 = nn.ConvTranspose2d(
+            in_channels=128, out_channels=64,
+            kernel_size=2,
+            stride=2)
+        self.up_convolution_4 = double_convolution(128, 64)
+        self.out = nn.Conv2d(
+            in_channels=64, out_channels=out_channels,
+            kernel_size=1
+        )
+    def forward(self, x):
+        down_1 = self.down_convolution_1(x)
+        down_2 = self.max_pool2d(down_1)
+        down_3 = self.down_convolution_2(down_2)
+        down_4 = self.max_pool2d(down_3)
+        down_5 = self.down_convolution_3(down_4)
+        down_6 = self.max_pool2d(down_5)
+        down_7 = self.down_convolution_4(down_6)
+        down_8 = self.max_pool2d(down_7)
+        down_9 = self.down_convolution_5(down_8)
+        up_1 = self.up_transpose_1(down_9)
+        x = self.up_convolution_1(torch.cat([down_7, up_1], 1))
+        up_2 = self.up_transpose_2(x)
+        x = self.up_convolution_2(torch.cat([down_5, up_2], 1))
+        up_3 = self.up_transpose_3(x)
+        x = self.up_convolution_3(torch.cat([down_3, up_3], 1))
+        up_4 = self.up_transpose_4(x)
+        x = self.up_convolution_4(torch.cat([down_1, up_4], 1))
+        out = self.out(x)
+        return out
+class CustomDataset(Dataset):
+    def __init__(self, image_dir, mask_dir, transform=None):
+        self.image_dir = image_dir
+        self.mask_dir = mask_dir
+        self.transform = transform
+        self.image_filenames = os.listdir(image_dir)
+        self.mask_filenames = os.listdir(mask_dir)
+    def __len__(self):
+        return len(self.image_filenames)
+    def __getitem__(self , idx):
+        image_path = os.path.join(self.image_dir, self.image_filenames[idx])
+        mask_path = os.path.join(self.mask_dir, self.mask_filenames[idx])
+        image = Image.open(image_path).convert("RGB")
+        mask = Image.open(mask_path).convert("L")
+        if self.transform:
+            image = self.transform(image)
+            mask = self.transform(mask)
+        return image,mask
+def train_model(model, dataloader, criterion, optimizer, num_epochs=25):
+    for epoch in range(num_epochs):
+        model.train()
+        running_loss = 0.0
+        for images,masks in dataloader:
+            optimizer.zero_grad()
+            outputs = model(images)
+            loss = criterion(outputs, masks)
+            loss.backward()
+            optimizer.step()
+            running_loss +=loss.item()
+        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(dataloader):.4f}')
+if __name__ == "__main__":
+    transform = transforms.Compose([
+        transforms.Resize((256,256)),
+        transforms.ToTensor(),
+    ])
+    image_dir = "face-synthetics-glasses/train/images"
+    mask_dir = "face-synthetics-glasses/train/masks"
+    dataset = CustomDataset(image_dir , mask_dir ,transform=transform)
+    dataloader = DataLoader(dataset,batch_size=2,shuffle=True)
+    model = UNet(3,1)
+    criterion = nn.BCEWithLogitsLoss()
+    optimizer = optim.Adam(model.parameters(),lr=0.001)
+    print("moving ahead")
+    # train_model(model,dataloader,criterion,optimizer,num_epochs=25)
+    # torch.save(model.state_dict(),"base_bat_ball.pth")