abiabidali commited on
Commit
634fff1
·
verified ·
1 Parent(s): fc24e3f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -9
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import torch
2
  from transformers import BlipProcessor, BlipForConditionalGeneration
3
  from PIL import Image
 
4
  import gradio as gr
5
  import numpy as np
6
  import tempfile
@@ -9,33 +10,104 @@ import os
9
  # Set device to GPU if available, otherwise use CPU
10
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
11
 
12
- # Load the BLIP model and processor
13
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
14
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  # Function to generate caption for the image using BLIP
17
  def generate_caption(image):
18
  inputs = processor(images=image, return_tensors="pt").to(device)
19
  output_ids = model.generate(**inputs)
20
  return processor.decode(output_ids[0], skip_special_tokens=True)
21
 
22
- # Function to process images and generate captions
23
- def process_images(image_files):
 
 
 
 
 
 
 
 
 
 
 
24
  captions = []
25
- for image_file in image_files:
 
26
  image = Image.open(image_file).convert('RGB')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  caption = generate_caption(image)
28
  captions.append(caption)
29
- return captions
 
 
 
 
 
 
 
 
 
 
30
 
31
  # Setup Gradio interface
32
  iface = gr.Interface(
33
  fn=process_images,
34
- inputs=[gr.Files(label="Upload Image Files")],
35
- outputs=[gr.Textbox(label="Image Captions")],
36
- title="Image Captioning with BLIP",
37
- description="Upload images and generate captions using the BLIP model from Hugging Face."
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  )
39
 
40
  iface.launch(debug=True)
41
 
 
 
1
  import torch
2
  from transformers import BlipProcessor, BlipForConditionalGeneration
3
  from PIL import Image
4
+ from RealESRGAN import RealESRGAN
5
  import gradio as gr
6
  import numpy as np
7
  import tempfile
 
10
  # Set device to GPU if available, otherwise use CPU
11
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
12
 
13
+ # Load the BLIP model and processor once
14
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
15
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
16
 
17
+ # Load the Real-ESRGAN models
18
+ model2 = RealESRGAN(device, scale=2)
19
+ model4 = RealESRGAN(device, scale=4)
20
+ model8 = RealESRGAN(device, scale=8)
21
+
22
+ # Load weights for all models at the start to avoid reloading during processing
23
+ model2.load_weights('weights/RealESRGAN_x2.pth', download=True)
24
+ model4.load_weights('weights/RealESRGAN_x4.pth', download=True)
25
+ model8.load_weights('weights/RealESRGAN_x8.pth', download=True)
26
+
27
+ # Function to enhance image resolution
28
+ def enhance_image(image, scale):
29
+ image_np = np.array(image.convert('RGB'))
30
+ if scale == '2x':
31
+ result = model2.predict(image_np)
32
+ elif scale == '4x':
33
+ result = model4.predict(image_np)
34
+ else:
35
+ result = model8.predict(image_np)
36
+ return Image.fromarray(np.uint8(result))
37
+
38
  # Function to generate caption for the image using BLIP
39
  def generate_caption(image):
40
  inputs = processor(images=image, return_tensors="pt").to(device)
41
  output_ids = model.generate(**inputs)
42
  return processor.decode(output_ids[0], skip_special_tokens=True)
43
 
44
+ # Function to adjust DPI of the image
45
+ def muda_dpi(image, dpi):
46
+ image.save(image.filename, format='JPEG', dpi=(dpi, dpi))
47
+ return image
48
+
49
+ # Function to resize the image
50
+ def resize_image(image, width, height):
51
+ return image.resize((width, height))
52
+
53
+ # Main function to process images
54
+ def process_images(image_files, enhance, scale, adjust_dpi, dpi, resize, width, height):
55
+ processed_images = []
56
+ file_paths = []
57
  captions = []
58
+
59
+ for i, image_file in enumerate(image_files):
60
  image = Image.open(image_file).convert('RGB')
61
+
62
+ # Enhance resolution if required
63
+ if enhance:
64
+ image = enhance_image(image, scale)
65
+
66
+ # Adjust DPI if required
67
+ if adjust_dpi:
68
+ image = muda_dpi(image, dpi)
69
+
70
+ # Resize if required
71
+ if resize:
72
+ image = resize_image(image, width, height)
73
+
74
+ # Generate caption
75
  caption = generate_caption(image)
76
  captions.append(caption)
77
+
78
+ # Save the processed image
79
+ custom_filename = f"Image_Captioning_with_BLIP_{i+1}.jpg"
80
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg')
81
+ image.save(temp_file.name, format='JPEG')
82
+ final_path = temp_file.name.replace(temp_file.name.split('/')[-1], custom_filename)
83
+ os.rename(temp_file.name, final_path)
84
+ file_paths.append(final_path)
85
+ processed_images.append(image)
86
+
87
+ return processed_images, file_paths, captions
88
 
89
  # Setup Gradio interface
90
  iface = gr.Interface(
91
  fn=process_images,
92
+ inputs=[
93
+ gr.Files(label="Upload Image Files"),
94
+ gr.Checkbox(label="Enhance Images (ESRGAN)"),
95
+ gr.Radio(['2x', '4x', '8x'], type="value", value='2x', label='Resolution model'),
96
+ gr.Checkbox(label="Adjust DPI"),
97
+ gr.Number(label="DPI", value=300),
98
+ gr.Checkbox(label="Resize"),
99
+ gr.Number(label="Width", value=512),
100
+ gr.Number(label="Height", value=512)
101
+ ],
102
+ outputs=[
103
+ gr.Gallery(label="Final Images"),
104
+ gr.Files(label="Download Final Images"),
105
+ gr.Textbox(label="Image Captions")
106
+ ],
107
+ title="High-Quality Image Enhancer with Fast Processing",
108
+ description="Upload multiple images (.jpg, .png), enhance using AI, adjust DPI, resize, generate captions, and download the final results."
109
  )
110
 
111
  iface.launch(debug=True)
112
 
113
+