Spaces:

bgaspra
/

CNN_MLP

Sleeping

App Files Files Community

bgaspra commited on Nov 13, 2024

Commit

0365b37

verified ·

1 Parent(s): e897bc2

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -25

app.py CHANGED Viewed

@@ -8,61 +8,88 @@ import pandas as pd
 from datasets import load_dataset
 from torch.utils.data import DataLoader, Dataset
 from sklearn.preprocessing import LabelEncoder
 # Load dataset
 dataset = load_dataset('thefcraft/civitai-stable-diffusion-337k', split='train[:10000]')
 # Preprocess text data
 tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
 class CustomDataset(Dataset):
-    def __init__(self, dataset):
         self.dataset = dataset
         self.transform = transforms.Compose([
             transforms.Resize((224, 224)),
             transforms.ToTensor(),
         ])
         self.label_encoder = LabelEncoder()
         self.labels = self.label_encoder.fit_transform(dataset['Model'])
     def __len__(self):
         return len(self.dataset)
     def __getitem__(self, idx):
-        image = self.transform(self.dataset[idx]['image'])
-        text = tokenizer(self.dataset[idx]['prompt'], padding='max_length', truncation=True, return_tensors='pt')
         label = self.labels[idx]
         return image, text, label
-# Define CNN for image processing
 class ImageModel(nn.Module):
     def __init__(self):
         super(ImageModel, self).__init__()
         self.model = models.resnet18(pretrained=True)
         self.model.fc = nn.Linear(self.model.fc.in_features, 512)
     def forward(self, x):
         return self.model(x)
-# Define MLP for text processing
 class TextModel(nn.Module):
     def __init__(self):
         super(TextModel, self).__init__()
         self.bert = BertModel.from_pretrained('bert-base-uncased')
         self.fc = nn.Linear(768, 512)
     def forward(self, x):
         output = self.bert(**x)
         return self.fc(output.pooler_output)
-# Combined model
 class CombinedModel(nn.Module):
     def __init__(self):
         super(CombinedModel, self).__init__()
         self.image_model = ImageModel()
         self.text_model = TextModel()
         self.fc = nn.Linear(1024, len(dataset['Model']))
     def forward(self, image, text):
         image_features = self.image_model(image)
         text_features = self.text_model(text)
@@ -72,24 +99,45 @@ class CombinedModel(nn.Module):
 # Instantiate model
 model = CombinedModel()
-# Define predict function
-def predict(image):
     model.eval()
     with torch.no_grad():
-        image = transforms.ToTensor()(image).unsqueeze(0)
-        image = transforms.Resize((224, 224))(image)
-        text_input = tokenizer("Sample prompt", return_tensors='pt', padding=True, truncation=True)
-        output = model(image, text_input)
-        _, indices = torch.topk(output, 5)
-        recommended_models = [dataset['Model'][i] for i in indices[0]]
-    return recommended_models
 # Set up Gradio interface
-interface = gr.Interface(fn=predict,
-                         inputs=gr.Image(type="pil"),
-                         outputs=gr.Textbox(label="Recommended Models"),
-                         title="AI Image Model Recommender",
-                         description="Upload an AI-generated image to receive model recommendations.")
 # Launch the app
 interface.launch()

 from datasets import load_dataset
 from torch.utils.data import DataLoader, Dataset
 from sklearn.preprocessing import LabelEncoder
+import requests
+from PIL import Image
+from io import BytesIO
+import numpy as np
 # Load dataset
 dataset = load_dataset('thefcraft/civitai-stable-diffusion-337k', split='train[:10000]')
+# Download and cache images
+def download_image(url):
+    try:
+        response = requests.get(url)
+        img = Image.open(BytesIO(response.content))
+        return img
+    except:
+        return None
+# Create image cache
+image_cache = {}
+for idx, item in enumerate(dataset):
+    if idx % 100 == 0:  # Status update
+        print(f"Downloaded {idx} images")
+    url = item['url']
+    if url not in image_cache:
+        img = download_image(url)
+        if img is not None:
+            image_cache[url] = img
 # Preprocess text data
 tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
 class CustomDataset(Dataset):
+    def __init__(self, dataset, image_cache):
         self.dataset = dataset
+        self.image_cache = image_cache
         self.transform = transforms.Compose([
             transforms.Resize((224, 224)),
             transforms.ToTensor(),
         ])
         self.label_encoder = LabelEncoder()
         self.labels = self.label_encoder.fit_transform(dataset['Model'])
     def __len__(self):
         return len(self.dataset)
     def __getitem__(self, idx):
+        url = self.dataset[idx]['url']
+        image = self.transform(self.image_cache[url])
+        text = tokenizer(self.dataset[idx]['prompt'],
+                        padding='max_length',
+                        truncation=True,
+                        return_tensors='pt')
         label = self.labels[idx]
         return image, text, label
+# Model definitions remain the same
 class ImageModel(nn.Module):
     def __init__(self):
         super(ImageModel, self).__init__()
         self.model = models.resnet18(pretrained=True)
         self.model.fc = nn.Linear(self.model.fc.in_features, 512)
     def forward(self, x):
         return self.model(x)
 class TextModel(nn.Module):
     def __init__(self):
         super(TextModel, self).__init__()
         self.bert = BertModel.from_pretrained('bert-base-uncased')
         self.fc = nn.Linear(768, 512)
     def forward(self, x):
         output = self.bert(**x)
         return self.fc(output.pooler_output)
 class CombinedModel(nn.Module):
     def __init__(self):
         super(CombinedModel, self).__init__()
         self.image_model = ImageModel()
         self.text_model = TextModel()
         self.fc = nn.Linear(1024, len(dataset['Model']))
     def forward(self, image, text):
         image_features = self.image_model(image)
         text_features = self.text_model(text)
 # Instantiate model
 model = CombinedModel()
+# Modified prediction function
+def get_recommendations(input_image):
     model.eval()
     with torch.no_grad():
+        # Process input image
+        transform = transforms.Compose([
+            transforms.Resize((224, 224)),
+            transforms.ToTensor()
+        ])
+        input_tensor = transform(input_image).unsqueeze(0)
+        # Get dummy text input (since we're focusing on image similarity)
+        text_input = tokenizer("", return_tensors='pt', padding=True, truncation=True)
+        # Get model output
+        output = model(input_tensor, text_input)
+        scores, indices = torch.topk(output, 5)
+        # Prepare gallery output
+        gallery_images = []
+        for idx in indices[0]:
+            url = dataset[idx]['url']
+            model_name = dataset[idx]['Model']
+            score = scores[0][idx].item()
+            # Get image from cache
+            if url in image_cache:
+                gallery_images.append((image_cache[url], f"{model_name}\nScore: {score:.2f}"))
+        return gallery_images
 # Set up Gradio interface
+interface = gr.Interface(
+    fn=get_recommendations,
+    inputs=gr.Image(type="pil"),
+    outputs=gr.Gallery(label="Recommended Images"),
+    title="Image Recommendation System",
+    description="Upload an image and get similar images with their model names and distances."
+)
 # Launch the app
 interface.launch()