Kelex83 commited on
Commit
073fe88
·
verified ·
1 Parent(s): 049cfb9

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +70 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ from PIL import Image
4
+ import requests
5
+ from io import BytesIO
6
+ from transformers import AutoProcessor, BlipForConditionalGeneration
7
+
8
+ # Load the pretrained processor and model
9
+ processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
10
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
11
+
12
+ def fetch_image(url: str) -> np.ndarray:
13
+ """Fetch an image from a given URL and return it as a numpy array."""
14
+ try:
15
+ response = requests.get(url, stream=True)
16
+ response.raise_for_status()
17
+ image = Image.open(response.raw).convert('RGB')
18
+ return np.array(image)
19
+ except Exception as e:
20
+ raise ValueError(f"Failed to fetch image: {str(e)}")
21
+
22
+ def caption_image(input_image=None, image_url=None):
23
+ """Generate captions for the input image or image fetched from a URL."""
24
+ try:
25
+ if image_url:
26
+ image_array = fetch_image(image_url)
27
+ elif input_image is not None:
28
+ image_array = input_image
29
+ else:
30
+ raise ValueError("Please provide either an image or an image URL.")
31
+
32
+ # Ensure the image is in RGB format
33
+ pil_image = Image.fromarray(image_array).convert('RGB')
34
+
35
+ # Process the image and generate caption
36
+ inputs = processor(pil_image, return_tensors="pt")
37
+ out = model.generate(**inputs, max_length=50)
38
+ caption = processor.decode(out[0], skip_special_tokens=True)
39
+
40
+ # Save caption as a downloadable .txt file
41
+ caption_path = "caption.txt"
42
+ with open(caption_path, "w") as f:
43
+ f.write(caption)
44
+
45
+ return caption, caption_path
46
+ except Exception as e:
47
+ return f"Error: {str(e)}", None
48
+
49
+ iface = gr.Interface(
50
+ fn=caption_image,
51
+ inputs=[
52
+ gr.Image(type="numpy", label="Upload Image"),
53
+ gr.Textbox(label="Image URL (Optional)", placeholder="Enter an image URL here")
54
+ ],
55
+ outputs=[
56
+ gr.Textbox(label="Generated Caption"),
57
+ gr.File(label="Download Caption")
58
+ ],
59
+ examples = [
60
+ ["model.jpg"],
61
+ ["horse.jpeg"],
62
+ ["panda.jpg"]
63
+ ],
64
+ title="Advanced Image Captioning with the BLIP model",
65
+ description="Upload an image or provide a URL to generate a caption. Download the generated caption as a .txt file.",
66
+ live=True,
67
+ theme="compact"
68
+ )
69
+
70
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio==3.29.0
2
+ transformers
3
+ torch
4
+ Pillow
5
+ requests