abiabidali commited on
Commit
f899183
·
verified ·
1 Parent(s): 634fff1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -46
app.py CHANGED
@@ -5,88 +5,120 @@ from RealESRGAN import RealESRGAN
5
  import gradio as gr
6
  import numpy as np
7
  import tempfile
 
8
  import os
9
 
10
  # Set device to GPU if available, otherwise use CPU
11
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
12
 
13
- # Load the BLIP model and processor once
14
- processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
15
- model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
 
 
 
 
 
 
 
 
16
 
17
- # Load the Real-ESRGAN models
18
- model2 = RealESRGAN(device, scale=2)
19
- model4 = RealESRGAN(device, scale=4)
20
- model8 = RealESRGAN(device, scale=8)
21
 
22
- # Load weights for all models at the start to avoid reloading during processing
23
- model2.load_weights('weights/RealESRGAN_x2.pth', download=True)
24
- model4.load_weights('weights/RealESRGAN_x4.pth', download=True)
25
- model8.load_weights('weights/RealESRGAN_x8.pth', download=True)
26
 
27
- # Function to enhance image resolution
28
  def enhance_image(image, scale):
29
- image_np = np.array(image.convert('RGB'))
30
- if scale == '2x':
31
- result = model2.predict(image_np)
32
- elif scale == '4x':
33
- result = model4.predict(image_np)
34
- else:
35
- result = model8.predict(image_np)
36
- return Image.fromarray(np.uint8(result))
 
 
 
 
 
 
 
 
 
 
 
37
 
38
- # Function to generate caption for the image using BLIP
39
  def generate_caption(image):
40
  inputs = processor(images=image, return_tensors="pt").to(device)
41
  output_ids = model.generate(**inputs)
42
- return processor.decode(output_ids[0], skip_special_tokens=True)
 
43
 
44
- # Function to adjust DPI of the image
45
- def muda_dpi(image, dpi):
46
- image.save(image.filename, format='JPEG', dpi=(dpi, dpi))
47
- return image
 
 
 
 
48
 
49
- # Function to resize the image
50
- def resize_image(image, width, height):
51
- return image.resize((width, height))
 
 
 
 
 
52
 
53
- # Main function to process images
54
  def process_images(image_files, enhance, scale, adjust_dpi, dpi, resize, width, height):
55
  processed_images = []
56
  file_paths = []
57
  captions = []
58
 
59
  for i, image_file in enumerate(image_files):
60
- image = Image.open(image_file).convert('RGB')
 
61
 
62
- # Enhance resolution if required
63
  if enhance:
64
- image = enhance_image(image, scale)
65
 
66
- # Adjust DPI if required
67
  if adjust_dpi:
68
- image = muda_dpi(image, dpi)
69
-
70
- # Resize if required
71
  if resize:
72
- image = resize_image(image, width, height)
73
-
74
- # Generate caption
75
- caption = generate_caption(image)
76
  captions.append(caption)
77
 
78
- # Save the processed image
79
  custom_filename = f"Image_Captioning_with_BLIP_{i+1}.jpg"
 
 
80
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg')
81
- image.save(temp_file.name, format='JPEG')
 
 
82
  final_path = temp_file.name.replace(temp_file.name.split('/')[-1], custom_filename)
83
  os.rename(temp_file.name, final_path)
 
 
84
  file_paths.append(final_path)
85
- processed_images.append(image)
86
 
87
  return processed_images, file_paths, captions
88
 
89
- # Setup Gradio interface
90
  iface = gr.Interface(
91
  fn=process_images,
92
  inputs=[
@@ -104,10 +136,11 @@ iface = gr.Interface(
104
  gr.Files(label="Download Final Images"),
105
  gr.Textbox(label="Image Captions")
106
  ],
107
- title="High-Quality Image Enhancer with Fast Processing",
108
  description="Upload multiple images (.jpg, .png), enhance using AI, adjust DPI, resize, generate captions, and download the final results."
109
  )
110
 
 
111
  iface.launch(debug=True)
112
 
113
 
 
5
  import gradio as gr
6
  import numpy as np
7
  import tempfile
8
+ import time
9
  import os
10
 
11
  # Set device to GPU if available, otherwise use CPU
12
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
13
 
14
+ # Load the Real-ESRGAN model with specified scale
15
+ def load_model(scale):
16
+ model = RealESRGAN(device, scale=scale)
17
+ weights_path = f'weights/RealESRGAN_x{scale}.pth'
18
+ try:
19
+ model.load_weights(weights_path, download=True)
20
+ print(f"Weights for scale {scale} loaded successfully.")
21
+ except Exception as e:
22
+ print(f"Error loading weights for scale {scale}: {e}")
23
+ model.load_weights(weights_path, download=False)
24
+ return model
25
 
26
+ # Load different scales of the Real-ESRGAN model
27
+ model2 = load_model(2)
28
+ model4 = load_model(4)
29
+ model8 = load_model(8)
30
 
31
+ # Initialize BLIP processor and model for image captioning
32
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
33
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
 
34
 
35
+ # Enhance the image using the specified scale
36
  def enhance_image(image, scale):
37
+ try:
38
+ print(f"Enhancing image with scale {scale}...")
39
+ start_time = time.time()
40
+ image_np = np.array(image.convert('RGB'))
41
+ print(f"Image converted to numpy array: shape {image_np.shape}, dtype {image_np.dtype}")
42
+
43
+ if scale == '2x':
44
+ result = model2.predict(image_np)
45
+ elif scale == '4x':
46
+ result = model4.predict(image_np)
47
+ else:
48
+ result = model8.predict(image_np)
49
+
50
+ enhanced_image = Image.fromarray(np.uint8(result))
51
+ print(f"Image enhanced in {time.time() - start_time:.2f} seconds")
52
+ return enhanced_image
53
+ except Exception as e:
54
+ print(f"Error enhancing image: {e}")
55
+ return image
56
 
57
+ # Generate captions for the images using BLIP
58
  def generate_caption(image):
59
  inputs = processor(images=image, return_tensors="pt").to(device)
60
  output_ids = model.generate(**inputs)
61
+ caption = processor.decode(output_ids[0], skip_special_tokens=True)
62
+ return caption
63
 
64
+ # Adjust the DPI of the image
65
+ def muda_dpi(input_image, dpi):
66
+ dpi_tuple = (dpi, dpi)
67
+ image = Image.fromarray(input_image.astype('uint8'), 'RGB')
68
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg')
69
+ image.save(temp_file, format='JPEG', dpi=dpi_tuple)
70
+ temp_file.close()
71
+ return Image.open(temp_file.name)
72
 
73
+ # Resize the image to the specified width and height
74
+ def resize_image(input_image, width, height):
75
+ image = Image.fromarray(input_image.astype('uint8'), 'RGB')
76
+ resized_image = image.resize((width, height))
77
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg')
78
+ resized_image.save(temp_file, format='JPEG')
79
+ temp_file.close()
80
+ return Image.open(temp_file.name)
81
 
82
+ # Process the images: enhance, adjust DPI, resize, caption, and save
83
  def process_images(image_files, enhance, scale, adjust_dpi, dpi, resize, width, height):
84
  processed_images = []
85
  file_paths = []
86
  captions = []
87
 
88
  for i, image_file in enumerate(image_files):
89
+ input_image = np.array(Image.open(image_file).convert('RGB'))
90
+ original_image = Image.fromarray(input_image.astype('uint8'), 'RGB')
91
 
 
92
  if enhance:
93
+ original_image = enhance_image(original_image, scale)
94
 
 
95
  if adjust_dpi:
96
+ original_image = muda_dpi(np.array(original_image), dpi)
97
+
 
98
  if resize:
99
+ original_image = resize_image(np.array(original_image), width, height)
100
+
101
+ # Generate a caption for the image
102
+ caption = generate_caption(original_image)
103
  captions.append(caption)
104
 
105
+ # Create a custom filename
106
  custom_filename = f"Image_Captioning_with_BLIP_{i+1}.jpg"
107
+
108
+ # Save the image with the custom filename
109
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg')
110
+ original_image.save(temp_file.name, format='JPEG')
111
+
112
+ # Rename the file with the custom name
113
  final_path = temp_file.name.replace(temp_file.name.split('/')[-1], custom_filename)
114
  os.rename(temp_file.name, final_path)
115
+
116
+ processed_images.append(original_image)
117
  file_paths.append(final_path)
 
118
 
119
  return processed_images, file_paths, captions
120
 
121
+ # Gradio interface setup
122
  iface = gr.Interface(
123
  fn=process_images,
124
  inputs=[
 
136
  gr.Files(label="Download Final Images"),
137
  gr.Textbox(label="Image Captions")
138
  ],
139
+ title="Multi-Image Enhancer with Captioning",
140
  description="Upload multiple images (.jpg, .png), enhance using AI, adjust DPI, resize, generate captions, and download the final results."
141
  )
142
 
143
+ # Launch the Gradio interface
144
  iface.launch(debug=True)
145
 
146