import logging from io import BytesIO def setup_logging(): """Set up logging configuration.""" logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[logging.StreamHandler()] # Only console logging ) return logging.getLogger(__name__) def meters_to_miles(meters): """Convert distance in meters to miles.""" return meters * 0.000621371 def validate_excel_file(file_stream: BytesIO) -> tuple[bool, str]: """Validate the uploaded file is an Excel file by its magic numbers.""" try: # Read the first 4 bytes to check the file signature header = file_stream.read(4) file_stream.seek(0) # Reset stream position for further processing # Check for Excel file signatures if header == b'\x50\x4B\x03\x04': # ZIP archive (xlsx) return True, "Valid Excel file" elif header == b'\xD0\xCF\x11\xE0': # Compound File (xls) return True, "Valid Excel file" else: return False, "Invalid file type: Not an Excel file" except Exception as e: return False, f"Validation error: {str(e)}" def clean_address(address): """Clean and standardize address strings.""" if not isinstance(address, str): return "" # Remove extra whitespace cleaned = " ".join(address.split()) # Remove common abbreviations and standardize format replacements = { "ST.": "STREET", "ST ": "STREET ", "AVE.": "AVENUE", "AVE ": "AVENUE ", "RD.": "ROAD", "RD ": "ROAD ", "BLVD.": "BOULEVARD", "BLVD ": "BOULEVARD ", "DR.": "DRIVE", "DR ": "DRIVE ", } for old, new in replacements.items(): cleaned = cleaned.replace(old, new) return cleaned def handle_empty_values(df, required_columns): """Handle empty values in required columns.""" # Create a copy to avoid modifying the original DataFrame clean_df = df.copy() # Fill empty values with empty strings for col in required_columns: if col in clean_df.columns: clean_df[col] = clean_df[col].fillna("") return clean_df