Spaces:
Sleeping
Sleeping
import logging | |
import requests | |
import requests_cache | |
from geopy.geocoders import Nominatim | |
from utils.config import WEATHER_FILE, WEATHER_FILE_URL, TEST_FILE, TEST_FILE_URL, TRAIN_FILE, TRAIN_FILE_URL, ONE_WEEK_SEC | |
from cachetools import TTLCache, cached | |
from typing import List, Dict | |
from pathlib import Path | |
import time | |
import pandas as pd | |
# Set pandas to display all columns | |
pd.set_option("display.max_columns", None) | |
# High precision longitudes and Latitudes | |
pd.set_option('display.float_format', '{:.16f}'.format) | |
# Install persistent cache | |
# requests_cache.install_cache('/client/requests_cache/yassir_requests_cache', expire_after=ONE_WEEK_SEC) # Cache expires after 1 week | |
requests_cache.install_cache(backend='memory', expire_after=ONE_WEEK_SEC) # Cache expires after 1 week | |
# Log | |
logging.basicConfig(level=logging.ERROR, | |
format='%(asctime)s - %(levelname)s - %(message)s') | |
# Date columns to parse | |
parse_dates = ['Timestamp'] | |
dtype = { | |
'Origin_lat': 'float64', | |
'Origin_lon': 'float64', | |
'Destination_lat': 'float64', | |
'Destination_lon': 'float64', | |
} | |
dtype_weather = { | |
'dewpoint_2m_temperature': 'float64', | |
'maximum_2m_air_temperature': 'float64', | |
'mean_2m_air_temperature': 'float64', | |
'mean_sea_level_pressure': 'float64', | |
'minimum_2m_air_temperature': 'float64', | |
'surface_pressure': 'float64', | |
'total_precipitation': 'float64', | |
'u_component_of_wind_10m': 'float64', | |
'v_component_of_wind_10m': 'float64', | |
} | |
# Load CSV files | |
# @cached(cache=TTLCache(maxsize=100000, ttl=ONE_WEEK_SEC)) # Memory # change cache library | |
def get_data_df(file: Path, file_url: str, parse_dates: List[str], dtype: Dict[str, str]) -> pd.DataFrame: | |
df = None | |
try: | |
df = pd.read_csv(file_url, parse_dates=parse_dates, dtype=dtype) | |
except Exception as e: | |
df = pd.read_csv(file, parse_dates=parse_dates, dtype=dtype) | |
logging.error( | |
f"Oops, the file is not available on the url, trying a local version: {e}") | |
finally: | |
return df | |
# @cached(cache=TTLCache(maxsize=100000, ttl=ONE_WEEK_SEC)) # Memory # unhassable dict. change cache library | |
def clean_df(df: pd.DataFrame) -> pd.DataFrame: | |
df.columns = [col.lower() for col in df.columns] | |
return df | |
# Read and get cleaned data | |
test_df = clean_df(get_data_df(TEST_FILE, TEST_FILE_URL, | |
parse_dates=parse_dates, dtype=dtype)) | |
train_df = clean_df(get_data_df(TRAIN_FILE, TRAIN_FILE_URL, | |
parse_dates=parse_dates, dtype=dtype)) | |
weather_df = clean_df(get_data_df( | |
WEATHER_FILE, WEATHER_FILE_URL, parse_dates=['date'], dtype=dtype_weather)) | |
def time_sec_hms(sec: float) -> str: | |
return time.strftime("%H:%M:%S", time.gmtime(sec)) | |
def full_time_sec_hms(sec: float) -> str: | |
hours = sec // 3600 | |
minutes = (sec % 3600) // 60 | |
remaining_sec = sec % 60 | |
return f"{round(hours):,}h: {round(minutes)}m: {round(remaining_sec)}s" | |
# @cached(cache=TTLCache(maxsize=100000, ttl=ONE_WEEK_SEC)) # Memory | |
def get_country_geojson(): | |
data = ( | |
pd.concat( | |
[ | |
train_df[['origin_lat', 'origin_lon', ]].rename(columns={'origin_lat': 'latitude', 'origin_lon': 'longitude'}), | |
train_df[['destination_lat', 'destination_lon']].rename(columns={'destination_lat': 'latitude', 'destination_lon': 'longitude'}) | |
], | |
ignore_index=True | |
) | |
.drop_duplicates() | |
) | |
# Initialize the Nominatim Geocoder | |
geolocator = Nominatim(user_agent="yassirAPP") | |
# Function to reverse geocode | |
def reverse_geocode(lat, lon): | |
location = geolocator.reverse((lat, lon), exactly_one=True) | |
address = location.raw['address'] | |
country = address.get('country', '') | |
return country | |
# Apply reverse geocoding to min latitude and longitude pair and also the maximum in the DataFrame | |
# Find the minimum latitude and longitude | |
min_lat = data['latitude'].min() | |
min_lon = data['longitude'].min() | |
max_lat = data['latitude'].max() | |
max_lon = data['longitude'].max() | |
country_min = reverse_geocode(min_lat, min_lon) | |
country_max = reverse_geocode(max_lat, max_lon) | |
if country_min == country_max: | |
country = country_min | |
# Get the location for Kenya | |
location = geolocator.geocode(country, exactly_one=True) | |
# If the location is found | |
if location: | |
# Get the bounding box for Kenya | |
bounding_box = location.raw['boundingbox'] | |
print(f"Bounding Box: {bounding_box}") | |
# Nominatim API URL with query parameters | |
url = "https://nominatim.openstreetmap.org/search" | |
# Parameters for the request | |
params = { | |
'q': country, # Kenya | |
'format': 'json', | |
'polygon_geojson': 1 # Request GeoJSON polygons in the response | |
} | |
# Headers for the request | |
headers = { | |
'User-Agent': 'yassirAPP' | |
} | |
# Send the request to Nominatim with headers | |
response = requests.get(url, params=params, headers=headers) | |
# Check if the request was successful | |
if response.status_code == 200: | |
country_geojson = response.json() | |
geojson = country_geojson[0]['geojson'] | |
return country, geojson, data | |