Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import torch
|
2 |
from transformers import BlipProcessor, BlipForConditionalGeneration
|
3 |
from PIL import Image
|
|
|
4 |
import gradio as gr
|
5 |
import numpy as np
|
6 |
import tempfile
|
@@ -9,33 +10,104 @@ import os
|
|
9 |
# Set device to GPU if available, otherwise use CPU
|
10 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
11 |
|
12 |
-
# Load the BLIP model and processor
|
13 |
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
14 |
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
# Function to generate caption for the image using BLIP
|
17 |
def generate_caption(image):
|
18 |
inputs = processor(images=image, return_tensors="pt").to(device)
|
19 |
output_ids = model.generate(**inputs)
|
20 |
return processor.decode(output_ids[0], skip_special_tokens=True)
|
21 |
|
22 |
-
# Function to
|
23 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
captions = []
|
25 |
-
|
|
|
26 |
image = Image.open(image_file).convert('RGB')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
caption = generate_caption(image)
|
28 |
captions.append(caption)
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
# Setup Gradio interface
|
32 |
iface = gr.Interface(
|
33 |
fn=process_images,
|
34 |
-
inputs=[
|
35 |
-
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
)
|
39 |
|
40 |
iface.launch(debug=True)
|
41 |
|
|
|
|
1 |
import torch
|
2 |
from transformers import BlipProcessor, BlipForConditionalGeneration
|
3 |
from PIL import Image
|
4 |
+
from RealESRGAN import RealESRGAN
|
5 |
import gradio as gr
|
6 |
import numpy as np
|
7 |
import tempfile
|
|
|
10 |
# Set device to GPU if available, otherwise use CPU
|
11 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
12 |
|
13 |
+
# Load the BLIP model and processor once
|
14 |
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
15 |
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
|
16 |
|
17 |
+
# Load the Real-ESRGAN models
|
18 |
+
model2 = RealESRGAN(device, scale=2)
|
19 |
+
model4 = RealESRGAN(device, scale=4)
|
20 |
+
model8 = RealESRGAN(device, scale=8)
|
21 |
+
|
22 |
+
# Load weights for all models at the start to avoid reloading during processing
|
23 |
+
model2.load_weights('weights/RealESRGAN_x2.pth', download=True)
|
24 |
+
model4.load_weights('weights/RealESRGAN_x4.pth', download=True)
|
25 |
+
model8.load_weights('weights/RealESRGAN_x8.pth', download=True)
|
26 |
+
|
27 |
+
# Function to enhance image resolution
|
28 |
+
def enhance_image(image, scale):
|
29 |
+
image_np = np.array(image.convert('RGB'))
|
30 |
+
if scale == '2x':
|
31 |
+
result = model2.predict(image_np)
|
32 |
+
elif scale == '4x':
|
33 |
+
result = model4.predict(image_np)
|
34 |
+
else:
|
35 |
+
result = model8.predict(image_np)
|
36 |
+
return Image.fromarray(np.uint8(result))
|
37 |
+
|
38 |
# Function to generate caption for the image using BLIP
|
39 |
def generate_caption(image):
|
40 |
inputs = processor(images=image, return_tensors="pt").to(device)
|
41 |
output_ids = model.generate(**inputs)
|
42 |
return processor.decode(output_ids[0], skip_special_tokens=True)
|
43 |
|
44 |
+
# Function to adjust DPI of the image
|
45 |
+
def muda_dpi(image, dpi):
|
46 |
+
image.save(image.filename, format='JPEG', dpi=(dpi, dpi))
|
47 |
+
return image
|
48 |
+
|
49 |
+
# Function to resize the image
|
50 |
+
def resize_image(image, width, height):
|
51 |
+
return image.resize((width, height))
|
52 |
+
|
53 |
+
# Main function to process images
|
54 |
+
def process_images(image_files, enhance, scale, adjust_dpi, dpi, resize, width, height):
|
55 |
+
processed_images = []
|
56 |
+
file_paths = []
|
57 |
captions = []
|
58 |
+
|
59 |
+
for i, image_file in enumerate(image_files):
|
60 |
image = Image.open(image_file).convert('RGB')
|
61 |
+
|
62 |
+
# Enhance resolution if required
|
63 |
+
if enhance:
|
64 |
+
image = enhance_image(image, scale)
|
65 |
+
|
66 |
+
# Adjust DPI if required
|
67 |
+
if adjust_dpi:
|
68 |
+
image = muda_dpi(image, dpi)
|
69 |
+
|
70 |
+
# Resize if required
|
71 |
+
if resize:
|
72 |
+
image = resize_image(image, width, height)
|
73 |
+
|
74 |
+
# Generate caption
|
75 |
caption = generate_caption(image)
|
76 |
captions.append(caption)
|
77 |
+
|
78 |
+
# Save the processed image
|
79 |
+
custom_filename = f"Image_Captioning_with_BLIP_{i+1}.jpg"
|
80 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg')
|
81 |
+
image.save(temp_file.name, format='JPEG')
|
82 |
+
final_path = temp_file.name.replace(temp_file.name.split('/')[-1], custom_filename)
|
83 |
+
os.rename(temp_file.name, final_path)
|
84 |
+
file_paths.append(final_path)
|
85 |
+
processed_images.append(image)
|
86 |
+
|
87 |
+
return processed_images, file_paths, captions
|
88 |
|
89 |
# Setup Gradio interface
|
90 |
iface = gr.Interface(
|
91 |
fn=process_images,
|
92 |
+
inputs=[
|
93 |
+
gr.Files(label="Upload Image Files"),
|
94 |
+
gr.Checkbox(label="Enhance Images (ESRGAN)"),
|
95 |
+
gr.Radio(['2x', '4x', '8x'], type="value", value='2x', label='Resolution model'),
|
96 |
+
gr.Checkbox(label="Adjust DPI"),
|
97 |
+
gr.Number(label="DPI", value=300),
|
98 |
+
gr.Checkbox(label="Resize"),
|
99 |
+
gr.Number(label="Width", value=512),
|
100 |
+
gr.Number(label="Height", value=512)
|
101 |
+
],
|
102 |
+
outputs=[
|
103 |
+
gr.Gallery(label="Final Images"),
|
104 |
+
gr.Files(label="Download Final Images"),
|
105 |
+
gr.Textbox(label="Image Captions")
|
106 |
+
],
|
107 |
+
title="High-Quality Image Enhancer with Fast Processing",
|
108 |
+
description="Upload multiple images (.jpg, .png), enhance using AI, adjust DPI, resize, generate captions, and download the final results."
|
109 |
)
|
110 |
|
111 |
iface.launch(debug=True)
|
112 |
|
113 |
+
|