dschandra commited on
Commit
4942d79
·
verified ·
1 Parent(s): ea1dace

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -18
app.py CHANGED
@@ -1,34 +1,60 @@
1
  import gradio as gr
2
  from transformers import BlipProcessor, BlipForConditionalGeneration
 
 
 
3
 
4
- # Load the BLIP model and processor from Hugging Face
5
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
6
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
7
 
 
 
 
 
 
 
 
8
  def generate_caption(image):
9
- # Process the image
10
  inputs = processor(images=image, return_tensors="pt")
11
-
12
- # Generate caption using BLIP model
13
  out = model.generate(**inputs)
14
-
15
- # Decode the output into a string
16
  caption = processor.decode(out[0], skip_special_tokens=True)
17
-
18
- # Custom description to match the theme of surroundings
19
- custom_description = """
20
- A tropical escape where the azure waves meet the golden sand, sheltered by palm trees and embraced by the distant hills.
21
- A place to unwind, breathe, and reconnect with nature.
22
- """
23
-
24
- return caption + "\n" + custom_description
25
-
26
- # Create the Gradio interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  iface = gr.Interface(fn=generate_caption,
28
  inputs=gr.Image(type="pil"),
29
  outputs=gr.Textbox(),
30
- title="Image Caption Generator",
31
- description="Upload an image and get a description with the surroundings of the image.")
32
 
33
  if __name__ == "__main__":
34
  iface.launch()
 
1
  import gradio as gr
2
  from transformers import BlipProcessor, BlipForConditionalGeneration
3
+ from transformers import DetrImageProcessor, DetrForObjectDetection
4
+ import torch
5
+ from PIL import Image, ImageDraw
6
 
7
+ # Load BLIP model for captioning
8
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
9
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
10
 
11
+ # Load DETR model for object detection (Detectron)
12
+ detr_processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
13
+ detr_model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
14
+
15
+ # List of objects for dynamic description
16
+ objects_of_interest = ["tree", "water", "mountain", "beach"]
17
+
18
  def generate_caption(image):
19
+ # Process the image for caption generation
20
  inputs = processor(images=image, return_tensors="pt")
 
 
21
  out = model.generate(**inputs)
 
 
22
  caption = processor.decode(out[0], skip_special_tokens=True)
23
+
24
+ # Object Detection: Detect objects in the image
25
+ inputs = detr_processor(images=image, return_tensors="pt")
26
+ outputs = detr_model(**inputs)
27
+
28
+ # Get detected objects and their labels
29
+ target_sizes = torch.tensor([image.size[::-1]])
30
+ results = detr_processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
31
+
32
+ detected_objects = []
33
+ for score, label in zip(results["scores"], results["labels"]):
34
+ if label.item() == 23: # label for "tree"
35
+ detected_objects.append("trees")
36
+ if label.item() == 8: # label for "water"
37
+ detected_objects.append("water")
38
+ if label.item() == 72: # label for "mountain"
39
+ detected_objects.append("mountains")
40
+
41
+ # Custom dynamic description based on detected objects
42
+ description = "This image includes "
43
+ if detected_objects:
44
+ description += ", ".join(detected_objects)
45
+ else:
46
+ description += "various elements of nature."
47
+
48
+ description += ". It provides a beautiful view that invites relaxation and exploration."
49
+
50
+ return caption + "\n" + description
51
+
52
+ # Gradio Interface
53
  iface = gr.Interface(fn=generate_caption,
54
  inputs=gr.Image(type="pil"),
55
  outputs=gr.Textbox(),
56
+ title="Dynamic Image Caption Generator",
57
+ description="Upload any image and get a detailed description of its contents.")
58
 
59
  if __name__ == "__main__":
60
  iface.launch()