Rammohan0504 commited on
Commit
6b0a154
·
verified ·
1 Parent(s): be1e7f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -13
app.py CHANGED
@@ -1,32 +1,48 @@
1
- from transformers import BlipProcessor, BlipForConditionalGeneration
2
- from PIL import Image
3
  import gradio as gr
4
  import torch
 
 
 
 
5
 
6
- # Load BLIP model and processor
7
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
8
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
9
- model.eval()
10
  device = "cuda" if torch.cuda.is_available() else "cpu"
11
  model.to(device)
12
 
13
- # Inference function
14
  def generate_caption(image):
 
 
15
  if image.mode != "RGB":
16
  image = image.convert("RGB")
17
 
18
- inputs = processor(image, return_tensors="pt").to(device, torch.float16)
19
  output = model.generate(**inputs, max_new_tokens=50)
20
  caption = processor.decode(output[0], skip_special_tokens=True)
 
 
 
 
 
21
  return caption
22
 
23
- # Gradio interface
 
 
 
 
 
 
 
24
  iface = gr.Interface(
25
- fn=generate_caption,
26
- inputs=gr.Image(type="pil"),
27
- outputs="text",
28
- title="Construction Site Image-to-Text Generator",
29
- description="Upload a site photo. The model will detect and describe construction activities."
 
30
  )
31
 
32
- iface.launch()
 
 
 
 
1
  import gradio as gr
2
  import torch
3
+ import time
4
+ from PIL import Image
5
+ from transformers import BlipProcessor, BlipForConditionalGeneration
6
+ from utils import create_pdf
7
 
8
+ # Load model and processor
9
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
10
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
 
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
12
  model.to(device)
13
 
 
14
  def generate_caption(image):
15
+ start_time = time.time()
16
+
17
  if image.mode != "RGB":
18
  image = image.convert("RGB")
19
 
20
+ inputs = processor(images=image, return_tensors="pt").to(device)
21
  output = model.generate(**inputs, max_new_tokens=50)
22
  caption = processor.decode(output[0], skip_special_tokens=True)
23
+
24
+ duration = time.time() - start_time
25
+ if duration > 5:
26
+ caption = f"⚠️ Took {round(duration, 2)}s: {caption}"
27
+
28
  return caption
29
 
30
+ def process_images(images):
31
+ results = []
32
+ for i, img in enumerate(images[:10]): # Limit to 10 images
33
+ caption = generate_caption(img)
34
+ results.append(f"Image {i+1}: {caption}")
35
+ pdf_file = create_pdf(results)
36
+ return "\n\n".join(results), pdf_file
37
+
38
  iface = gr.Interface(
39
+ fn=process_images,
40
+ inputs=gr.File(label="Upload up to 10 Site Images", type="file", file_types=[".jpg", ".png"], multiple=True),
41
+ outputs=["text", "file"],
42
+ title="Auto-DPR Generator from Site Images",
43
+ description="Upload construction site images. AI will auto-generate a progress summary and downloadable PDF.",
44
+ allow_flagging="never"
45
  )
46
 
47
+ if __name__ == "__main__":
48
+ iface.launch()