from transformers import BlipProcessor, BlipForConditionalGeneration from PIL import Image import gradio as gr import torch from datetime import datetime from reportlab.lib.pagesizes import letter from reportlab.pdfgen import canvas # Load BLIP model and processor processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") model.eval() device = "cuda" if torch.cuda.is_available() else "cpu" model.to(device) # Define categories for construction activities and materials construction_terms = { "activities": ["pouring", "scaffolding", "building", "excavation", "piling", "digging", "cementing", "welding", "cutting", "assembling", "drilling"], "materials": ["concrete", "steel", "wood", "bricks", "cement", "sand", "mortar", "rebar", "plaster", "tiles"], "progress": ["completed", "ongoing", "in-progress", "starting", "finished", "under construction"] } # Function to detect activities and materials def detect_construction_info(caption): activity_found = [] material_found = [] progress_found = [] # Split the caption into words and check for the terms for word in caption.split(): word_lower = word.lower() if word_lower in construction_terms["activities"]: activity_found.append(word) elif word_lower in construction_terms["materials"]: material_found.append(word) elif word_lower in construction_terms["progress"]: progress_found.append(word) # Build the informative output activity_str = ", ".join(activity_found) if activity_found else "No specific activities detected." material_str = ", ".join(material_found) if material_found else "No materials detected." progress_str = ", ".join(progress_found) if progress_found else "No progress information available." return f"Activities: {activity_str}\nMaterials: {material_str}\nProgress: {progress_str}" # Function to generate the daily progress report def generate_dpr(files): dpr_text = [] current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") # Add header to the PDF dpr_text.append(f"Daily Progress Report\nGenerated on: {current_time}\n") # Process each uploaded file (image) for file in files: # Open the image from file path image = Image.open(file.name) # Using file.name for filepath if image.mode != "RGB": image = image.convert("RGB") # Preprocess the image and generate a caption inputs = processor(image, return_tensors="pt").to(device, torch.float16) output = model.generate(**inputs, max_new_tokens=50) caption = processor.decode(output[0], skip_special_tokens=True) # Get detailed construction information based on the caption detailed_caption = detect_construction_info(caption) # Generate DPR section for this image dpr_section = f"\nImage: {file.name}\n{detailed_caption}\n" dpr_text.append(dpr_section) # Generate a PDF report pdf_path = "dpr_report.pdf" c = canvas.Canvas(pdf_path, pagesize=letter) c.drawString(100, 750, "Daily Progress Report") c.drawString(100, 730, f"Generated on: {current_time}") # Add the detailed captions for each image to the PDF y_position = 700 for section in dpr_text: c.drawString(100, y_position, section) y_position -= 100 # Move down for the next section if y_position < 100: c.showPage() y_position = 750 c.save() return pdf_path # Gradio interface for uploading multiple files iface = gr.Interface( fn=generate_dpr, inputs=gr.Files(type="filepath", label="Upload Site Photos"), # Handle batch upload of images outputs="file", title="Daily Progress Report Generator", description="Upload up to 10 site photos. The AI model will detect construction activities, materials, and progress and generate a PDF report.", allow_flagging="never" # Optional: Disable flagging ) iface.launch()