image-enhancer-multiple-image

Build error

App Files Files Community

abiabidali commited on Aug 15, 2024

Commit

634fff1

verified ·

1 Parent(s): fc24e3f

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -9

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import torch
 from transformers import BlipProcessor, BlipForConditionalGeneration
 from PIL import Image
 import gradio as gr
 import numpy as np
 import tempfile
@@ -9,33 +10,104 @@ import os
 # Set device to GPU if available, otherwise use CPU
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-# Load the BLIP model and processor
 processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
 model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
 # Function to generate caption for the image using BLIP
 def generate_caption(image):
     inputs = processor(images=image, return_tensors="pt").to(device)
     output_ids = model.generate(**inputs)
     return processor.decode(output_ids[0], skip_special_tokens=True)
-# Function to process images and generate captions
-def process_images(image_files):
     captions = []
-    for image_file in image_files:
         image = Image.open(image_file).convert('RGB')
         caption = generate_caption(image)
         captions.append(caption)
-    return captions
 # Setup Gradio interface
 iface = gr.Interface(
     fn=process_images,
-    inputs=[gr.Files(label="Upload Image Files")],
-    outputs=[gr.Textbox(label="Image Captions")],
-    title="Image Captioning with BLIP",
-    description="Upload images and generate captions using the BLIP model from Hugging Face."
 )
 iface.launch(debug=True)

 import torch
 from transformers import BlipProcessor, BlipForConditionalGeneration
 from PIL import Image
+from RealESRGAN import RealESRGAN
 import gradio as gr
 import numpy as np
 import tempfile
 # Set device to GPU if available, otherwise use CPU
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+# Load the BLIP model and processor once
 processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
 model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
+# Load the Real-ESRGAN models
+model2 = RealESRGAN(device, scale=2)
+model4 = RealESRGAN(device, scale=4)
+model8 = RealESRGAN(device, scale=8)
+# Load weights for all models at the start to avoid reloading during processing
+model2.load_weights('weights/RealESRGAN_x2.pth', download=True)
+model4.load_weights('weights/RealESRGAN_x4.pth', download=True)
+model8.load_weights('weights/RealESRGAN_x8.pth', download=True)
+# Function to enhance image resolution
+def enhance_image(image, scale):
+    image_np = np.array(image.convert('RGB'))
+    if scale == '2x':
+        result = model2.predict(image_np)
+    elif scale == '4x':
+        result = model4.predict(image_np)
+    else:
+        result = model8.predict(image_np)
+    return Image.fromarray(np.uint8(result))
 # Function to generate caption for the image using BLIP
 def generate_caption(image):
     inputs = processor(images=image, return_tensors="pt").to(device)
     output_ids = model.generate(**inputs)
     return processor.decode(output_ids[0], skip_special_tokens=True)
+# Function to adjust DPI of the image
+def muda_dpi(image, dpi):
+    image.save(image.filename, format='JPEG', dpi=(dpi, dpi))
+    return image
+# Function to resize the image
+def resize_image(image, width, height):
+    return image.resize((width, height))
+# Main function to process images
+def process_images(image_files, enhance, scale, adjust_dpi, dpi, resize, width, height):
+    processed_images = []
+    file_paths = []
     captions = []
+    for i, image_file in enumerate(image_files):
         image = Image.open(image_file).convert('RGB')
+        # Enhance resolution if required
+        if enhance:
+            image = enhance_image(image, scale)
+        # Adjust DPI if required
+        if adjust_dpi:
+            image = muda_dpi(image, dpi)
+        # Resize if required
+        if resize:
+            image = resize_image(image, width, height)
+        # Generate caption
         caption = generate_caption(image)
         captions.append(caption)
+        # Save the processed image
+        custom_filename = f"Image_Captioning_with_BLIP_{i+1}.jpg"
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg')
+        image.save(temp_file.name, format='JPEG')
+        final_path = temp_file.name.replace(temp_file.name.split('/')[-1], custom_filename)
+        os.rename(temp_file.name, final_path)
+        file_paths.append(final_path)
+        processed_images.append(image)
+    return processed_images, file_paths, captions
 # Setup Gradio interface
 iface = gr.Interface(
     fn=process_images,
+    inputs=[
+        gr.Files(label="Upload Image Files"),
+        gr.Checkbox(label="Enhance Images (ESRGAN)"),
+        gr.Radio(['2x', '4x', '8x'], type="value", value='2x', label='Resolution model'),
+        gr.Checkbox(label="Adjust DPI"),
+        gr.Number(label="DPI", value=300),
+        gr.Checkbox(label="Resize"),
+        gr.Number(label="Width", value=512),
+        gr.Number(label="Height", value=512)
+    ],
+    outputs=[
+        gr.Gallery(label="Final Images"),
+        gr.Files(label="Download Final Images"),
+        gr.Textbox(label="Image Captions")
+    ],
+    title="High-Quality Image Enhancer with Fast Processing",
+    description="Upload multiple images (.jpg, .png), enhance using AI, adjust DPI, resize, generate captions, and download the final results."
 )
 iface.launch(debug=True)