File size: 2,246 Bytes
5754a38
 
 
 
 
 
 
 
 
1a2035c
5754a38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import logging
from io import BytesIO


def setup_logging():
    """Set up logging configuration."""
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        handlers=[logging.StreamHandler()]  # Only console logging
    )
    return logging.getLogger(__name__)

def meters_to_miles(meters):
    """Convert distance in meters to miles."""
    return meters * 0.000621371

def validate_excel_file(file_stream: BytesIO) -> tuple[bool, str]:
    """Validate the uploaded file is an Excel file by its magic numbers."""
    try:
        # Read the first 4 bytes to check the file signature
        header = file_stream.read(4)
        file_stream.seek(0)  # Reset stream position for further processing

        # Check for Excel file signatures
        if header == b'\x50\x4B\x03\x04':  # ZIP archive (xlsx)
            return True, "Valid Excel file"
        elif header == b'\xD0\xCF\x11\xE0':  # Compound File (xls)
            return True, "Valid Excel file"
        else:
            return False, "Invalid file type: Not an Excel file"
    except Exception as e:
        return False, f"Validation error: {str(e)}"

def clean_address(address):
    """Clean and standardize address strings."""
    if not isinstance(address, str):
        return ""
    
    # Remove extra whitespace
    cleaned = " ".join(address.split())
    
    # Remove common abbreviations and standardize format
    replacements = {
        "ST.": "STREET",
        "ST ": "STREET ",
        "AVE.": "AVENUE",
        "AVE ": "AVENUE ",
        "RD.": "ROAD",
        "RD ": "ROAD ",
        "BLVD.": "BOULEVARD",
        "BLVD ": "BOULEVARD ",
        "DR.": "DRIVE",
        "DR ": "DRIVE ",
    }
    
    for old, new in replacements.items():
        cleaned = cleaned.replace(old, new)
    
    return cleaned

def handle_empty_values(df, required_columns):
    """Handle empty values in required columns."""
    # Create a copy to avoid modifying the original DataFrame
    clean_df = df.copy()
    
    # Fill empty values with empty strings
    for col in required_columns:
        if col in clean_df.columns:
            clean_df[col] = clean_df[col].fillna("")
    
    return clean_df