pjohn2409 commited on
Commit
48ce321
·
1 Parent(s): 1b8115b

Initial commit

Browse files
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from torchvision import transforms
4
+ from transformers import CLIPModel
5
+ from PIL import Image
6
+ import gradio as gr
7
+
8
+ # Define class labels
9
+ class_labels = ["Trash", "Compostable", "Recyclable"]
10
+
11
+ # Define CLIP Classifier (same as used during training)
12
+ class CLIPClassifier(nn.Module):
13
+ def __init__(self, clip_model, num_classes):
14
+ super(CLIPClassifier, self).__init__()
15
+ self.clip = clip_model.vision_model
16
+ self.fc = nn.Linear(768, num_classes)
17
+
18
+ def forward(self, images):
19
+ image_features = self.clip(images).pooler_output
20
+ return self.fc(image_features)
21
+
22
+ # Load the model
23
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
24
+ clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
25
+ model = CLIPClassifier(clip_model, num_classes=3).to(device)
26
+
27
+ # Load the saved weights
28
+ model.load_state_dict(torch.load("clip_trash_classifier_finetuned.pth", map_location=device))
29
+ model.eval()
30
+
31
+ # Preprocessing pipeline
32
+ transform = transforms.Compose([
33
+ transforms.Resize((224, 224)),
34
+ transforms.ToTensor(),
35
+ transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073], # CLIP's mean
36
+ std=[0.26862954, 0.26130258, 0.27577711]) # CLIP's std
37
+ ])
38
+
39
+ # Prediction function
40
+ def predict(image):
41
+ """
42
+ Function to predict the class label and confidence of the uploaded image.
43
+ Returns separate values for label and confidence.
44
+ """
45
+ # Preprocess the image
46
+ image = transform(image).unsqueeze(0).to(device)
47
+
48
+ # Perform inference
49
+ with torch.no_grad():
50
+ outputs = model(image)
51
+ probabilities = torch.nn.functional.softmax(outputs[0], dim=0)
52
+ confidence, predicted = torch.max(probabilities, dim=0)
53
+
54
+ # Get predicted class and confidence score
55
+ predicted_class = class_labels[predicted.item()]
56
+ confidence_score = f"{confidence.item() * 100:.2f}%"
57
+
58
+ # Return as separate outputs
59
+ return predicted_class, confidence_score
60
+
61
+ # Gradio Interface
62
+ interface = gr.Interface(
63
+ fn=predict, # Prediction function
64
+ inputs=gr.Image(type="pil"), # Input: Image in PIL format
65
+ outputs=[
66
+ gr.Textbox(label="Predicted Category"), # Output 1: Predicted Label
67
+ gr.Textbox(label="Confidence") # Output 2: Confidence Score
68
+ ],
69
+ title="Trash Classifier Using CLIP",
70
+ description="Upload an image to classify it as **Trash**, **Compostable**, or **Recyclable**.\n"
71
+ "The app will display the predicted category and confidence score."
72
+ )
73
+
74
+ # Launch the app
75
+ if __name__ == "__main__":
76
+ interface.launch(share=True)
clip_trash_classifier_finetuned.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3091e4fd097fbca183acb6aa0a21115fd359d1c40b15601adde1a9ab2c2072d0
3
+ size 349913432
examples/compostables.jpg ADDED
examples/recyclables.jpg ADDED
examples/trash.jpg ADDED
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ torch==2.0.1
2
+ torchvision==0.15.2
3
+ transformers==4.31.0
4
+ gradio==3.50.2
5
+ Pillow==10.0.0