abiabidali commited on
Commit
fc24e3f
·
verified ·
1 Parent(s): acf7f61

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -80
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import torch
2
  from transformers import BlipProcessor, BlipForConditionalGeneration
3
  from PIL import Image
4
- from RealESRGAN import RealESRGAN
5
  import gradio as gr
6
  import numpy as np
7
  import tempfile
@@ -10,102 +9,33 @@ import os
10
  # Set device to GPU if available, otherwise use CPU
11
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
12
 
13
- # Load the BLIP model and processor once
14
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
15
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
16
 
17
- # Load the Real-ESRGAN models
18
- model2 = RealESRGAN(device, scale=2)
19
- model4 = RealESRGAN(device, scale=4)
20
- model8 = RealESRGAN(device, scale=8)
21
-
22
- # Load weights for all models at the start to avoid reloading during processing
23
- model2.load_weights('weights/RealESRGAN_x2.pth', download=True)
24
- model4.load_weights('weights/RealESRGAN_x4.pth', download=True)
25
- model8.load_weights('weights/RealESRGAN_x8.pth', download=True)
26
-
27
- # Function to enhance image resolution
28
- def enhance_image(image, scale):
29
- image_np = np.array(image.convert('RGB'))
30
- if scale == '2x':
31
- result = model2.predict(image_np)
32
- elif scale == '4x':
33
- result = model4.predict(image_np)
34
- else:
35
- result = model8.predict(image_np)
36
- return Image.fromarray(np.uint8(result))
37
-
38
  # Function to generate caption for the image using BLIP
39
  def generate_caption(image):
40
  inputs = processor(images=image, return_tensors="pt").to(device)
41
  output_ids = model.generate(**inputs)
42
  return processor.decode(output_ids[0], skip_special_tokens=True)
43
 
44
- # Function to adjust DPI of the image
45
- def muda_dpi(image, dpi):
46
- image.save(image.filename, format='JPEG', dpi=(dpi, dpi))
47
- return image
48
-
49
- # Function to resize the image
50
- def resize_image(image, width, height):
51
- return image.resize((width, height))
52
-
53
- # Main function to process images
54
- def process_images(image_files, enhance, scale, adjust_dpi, dpi, resize, width, height):
55
- processed_images = []
56
- file_paths = []
57
  captions = []
58
-
59
- for i, image_file in enumerate(image_files):
60
  image = Image.open(image_file).convert('RGB')
61
-
62
- # Enhance resolution if required
63
- if enhance:
64
- image = enhance_image(image, scale)
65
-
66
- # Adjust DPI if required
67
- if adjust_dpi:
68
- image = muda_dpi(image, dpi)
69
-
70
- # Resize if required
71
- if resize:
72
- image = resize_image(image, width, height)
73
-
74
- # Generate caption
75
  caption = generate_caption(image)
76
  captions.append(caption)
77
-
78
- # Save the processed image
79
- custom_filename = f"Image_Captioning_with_BLIP_{i+1}.jpg"
80
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg')
81
- image.save(temp_file.name, format='JPEG')
82
- final_path = temp_file.name.replace(temp_file.name.split('/')[-1], custom_filename)
83
- os.rename(temp_file.name, final_path)
84
- file_paths.append(final_path)
85
- processed_images.append(image)
86
-
87
- return processed_images, file_paths, captions
88
 
89
  # Setup Gradio interface
90
  iface = gr.Interface(
91
  fn=process_images,
92
- inputs=[
93
- gr.Files(label="Upload Image Files"),
94
- gr.Checkbox(label="Enhance Images (ESRGAN)"),
95
- gr.Radio(['2x', '4x', '8x'], type="value", value='2x', label='Resolution model'),
96
- gr.Checkbox(label="Adjust DPI"),
97
- gr.Number(label="DPI", value=300),
98
- gr.Checkbox(label="Resize"),
99
- gr.Number(label="Width", value=512),
100
- gr.Number(label="Height", value=512)
101
- ],
102
- outputs=[
103
- gr.Gallery(label="Final Images"),
104
- gr.Files(label="Download Final Images"),
105
- gr.Textbox(label="Image Captions")
106
- ],
107
- title="High-Quality Image Enhancer with Fast Processing",
108
- description="Upload multiple images (.jpg, .png), enhance using AI, adjust DPI, resize, generate captions, and download the final results."
109
  )
110
 
111
  iface.launch(debug=True)
 
 
1
  import torch
2
  from transformers import BlipProcessor, BlipForConditionalGeneration
3
  from PIL import Image
 
4
  import gradio as gr
5
  import numpy as np
6
  import tempfile
 
9
  # Set device to GPU if available, otherwise use CPU
10
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
11
 
12
+ # Load the BLIP model and processor
13
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
14
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  # Function to generate caption for the image using BLIP
17
  def generate_caption(image):
18
  inputs = processor(images=image, return_tensors="pt").to(device)
19
  output_ids = model.generate(**inputs)
20
  return processor.decode(output_ids[0], skip_special_tokens=True)
21
 
22
+ # Function to process images and generate captions
23
+ def process_images(image_files):
 
 
 
 
 
 
 
 
 
 
 
24
  captions = []
25
+ for image_file in image_files:
 
26
  image = Image.open(image_file).convert('RGB')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  caption = generate_caption(image)
28
  captions.append(caption)
29
+ return captions
 
 
 
 
 
 
 
 
 
 
30
 
31
  # Setup Gradio interface
32
  iface = gr.Interface(
33
  fn=process_images,
34
+ inputs=[gr.Files(label="Upload Image Files")],
35
+ outputs=[gr.Textbox(label="Image Captions")],
36
+ title="Image Captioning with BLIP",
37
+ description="Upload images and generate captions using the BLIP model from Hugging Face."
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  )
39
 
40
  iface.launch(debug=True)
41
+