Rammohan0504 commited on
Commit
4bdf8dd
·
verified ·
1 Parent(s): e65a9ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -28
app.py CHANGED
@@ -1,38 +1,32 @@
1
- import gradio as gr
2
  from transformers import BlipProcessor, BlipForConditionalGeneration
3
  from PIL import Image
4
- from fpdf import FPDF
5
- import os
6
- from datetime import datetime
7
 
 
8
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
9
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
 
 
 
10
 
11
- def analyze_image(image):
12
- raw_image = Image.fromarray(image)
13
- text = "Describe the construction site"
14
- inputs = processor(raw_image, text, return_tensors="pt")
15
- out = model.generate(**inputs)
16
- caption = processor.decode(out[0], skip_special_tokens=True)
17
-
18
- date_str = datetime.now().strftime("%Y-%m-%d")
19
- pdf = FPDF()
20
- pdf.add_page()
21
- pdf.set_font("Arial", size=12)
22
- pdf.multi_cell(0, 10, f"Daily Progress Report - {date_str}\n\nCaption: {caption}")
23
 
24
- os.makedirs("reports", exist_ok=True)
25
- file_path = f"reports/DPR_{date_str}.pdf"
26
- pdf.output(file_path)
27
- return caption, file_path
28
 
29
- demo = gr.Interface(
30
- fn=analyze_image,
31
- inputs=gr.Image(type="numpy", label="Upload Site Photo"),
32
- outputs=[gr.Textbox(label="Generated Caption"), gr.File(label="Download DPR PDF")],
33
- title="Auto DPR Generator",
34
- description="Upload a construction site image to generate a Daily Progress Report."
 
35
  )
36
 
37
- if __name__ == "__main__":
38
- demo.launch()
 
 
1
  from transformers import BlipProcessor, BlipForConditionalGeneration
2
  from PIL import Image
3
+ import gradio as gr
4
+ import torch
 
5
 
6
+ # Load BLIP model and processor
7
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
8
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
9
+ model.eval()
10
+ device = "cuda" if torch.cuda.is_available() else "cpu"
11
+ model.to(device)
12
 
13
+ # Inference function
14
+ def generate_caption(image):
15
+ if image.mode != "RGB":
16
+ image = image.convert("RGB")
 
 
 
 
 
 
 
 
17
 
18
+ inputs = processor(image, return_tensors="pt").to(device, torch.float16)
19
+ output = model.generate(**inputs, max_new_tokens=50)
20
+ caption = processor.decode(output[0], skip_special_tokens=True)
21
+ return caption
22
 
23
+ # Gradio interface
24
+ iface = gr.Interface(
25
+ fn=generate_caption,
26
+ inputs=gr.Image(type="pil"),
27
+ outputs="text",
28
+ title="Construction Site Image-to-Text Generator",
29
+ description="Upload a site photo. The model will detect and describe construction activities."
30
  )
31
 
32
+ iface.launch()