Rammohan0504 commited on
Commit
ef4e447
·
verified ·
1 Parent(s): 2e55271

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -17
app.py CHANGED
@@ -19,31 +19,37 @@ construction_terms = [
19
  ]
20
 
21
  # Inference function
22
- def generate_caption(image):
23
- if image.mode != "RGB":
24
- image = image.convert("RGB")
25
 
26
- # Preprocess the image and generate a caption
27
- inputs = processor(image, return_tensors="pt").to(device, torch.float16)
28
- output = model.generate(**inputs, max_new_tokens=50)
29
- caption = processor.decode(output[0], skip_special_tokens=True)
 
 
 
 
30
 
31
- # Filter the caption to only include construction-related terms
32
- filtered_caption = " ".join([word for word in caption.split() if word.lower() in construction_terms])
 
 
 
 
 
 
33
 
34
- # If no construction-related terms are found, return a default message
35
- if not filtered_caption:
36
- filtered_caption = "No construction-related activities detected."
37
-
38
- return filtered_caption
39
 
40
  # Gradio interface
41
  iface = gr.Interface(
42
- fn=generate_caption,
43
- inputs=gr.Image(type="pil"),
44
  outputs="text",
45
  title="Construction Site Image-to-Text Generator",
46
- description="Upload a site photo. The model will detect and describe construction activities and materials (e.g., concrete pouring, scaffolding, steel rods)."
 
47
  )
48
 
49
  iface.launch()
 
19
  ]
20
 
21
  # Inference function
22
+ def generate_captions(images):
23
+ captions = []
 
24
 
25
+ for image in images:
26
+ if image.mode != "RGB":
27
+ image = image.convert("RGB")
28
+
29
+ # Preprocess the image and generate a caption
30
+ inputs = processor(image, return_tensors="pt").to(device, torch.float16)
31
+ output = model.generate(**inputs, max_new_tokens=50)
32
+ caption = processor.decode(output[0], skip_special_tokens=True)
33
 
34
+ # Filter the caption to only include construction-related terms
35
+ filtered_caption = " ".join([word for word in caption.split() if word.lower() in construction_terms])
36
+
37
+ # If no construction-related terms are found, return a default message
38
+ if not filtered_caption:
39
+ filtered_caption = "No construction-related activities detected."
40
+
41
+ captions.append(filtered_caption)
42
 
43
+ return captions
 
 
 
 
44
 
45
  # Gradio interface
46
  iface = gr.Interface(
47
+ fn=generate_captions,
48
+ inputs=gr.Image(type="pil", tool="editor", label="Upload Site Photos", multiple=True),
49
  outputs="text",
50
  title="Construction Site Image-to-Text Generator",
51
+ description="Upload up to 10 site photos. The model will detect and describe construction activities and materials (e.g., concrete pouring, scaffolding, steel rods).",
52
+ allow_flagging="never"
53
  )
54
 
55
  iface.launch()