import torch import torch.nn as nn from torchvision import transforms from transformers import CLIPModel from PIL import Image import gradio as gr # Define class labels class_labels = ["Trash", "Compostable", "Recyclable"] # Define CLIP Classifier (same as used during training) class CLIPClassifier(nn.Module): def __init__(self, clip_model, num_classes): super(CLIPClassifier, self).__init__() self.clip = clip_model.vision_model self.fc = nn.Linear(768, num_classes) def forward(self, images): image_features = self.clip(images).pooler_output return self.fc(image_features) # Load the model device = torch.device("cuda" if torch.cuda.is_available() else "cpu") clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") model = CLIPClassifier(clip_model, num_classes=3).to(device) # Load the saved weights model.load_state_dict(torch.load("clip_trash_classifier_finetuned.pth", map_location=device)) model.eval() # Preprocessing pipeline transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073], # CLIP's mean std=[0.26862954, 0.26130258, 0.27577711]) # CLIP's std ]) # Prediction function def predict(image): """ Function to predict the class label and confidence of the uploaded image. Returns separate values for label and confidence. """ # Preprocess the image image = transform(image).unsqueeze(0).to(device) # Perform inference with torch.no_grad(): outputs = model(image) probabilities = torch.nn.functional.softmax(outputs[0], dim=0) confidence, predicted = torch.max(probabilities, dim=0) # Get predicted class and confidence score predicted_class = class_labels[predicted.item()] confidence_score = f"{confidence.item() * 100:.2f}%" # Return as separate outputs return predicted_class, confidence_score # Gradio Interface interface = gr.Interface( fn=predict, # Prediction function inputs=gr.Image(type="pil"), # Input: Image in PIL format outputs=[ gr.Textbox(label="Predicted Category"), # Output 1: Predicted Label gr.Textbox(label="Confidence") # Output 2: Confidence Score ], title="Trash Classifier Using CLIP", description="Upload an image to classify it as **Trash**, **Compostable**, or **Recyclable**.\n" "The app will display the predicted category and confidence score." ) # Launch the app if __name__ == "__main__": interface.launch(share=True)