File size: 3,949 Bytes
89faf7b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
from fastapi import FastAPI
import uvicorn

import pandas as pd
import numpy as np
import pickle
import rasterio
import h5py
from skimage.morphology import disk
from geopy.extra.rate_limiter import RateLimiter
from geopy.geocoders import Nominatim

app = FastAPI()


#Endpoints
#Root endpoints
@app.get("/")
def root():
    return {"API": "Hail API 2020"}
    
def geocode_address(address):

    try:
        address2 = address.replace(' ', '+').replace(',', '%2C')
        df = pd.read_json(
            f'https://geocoding.geo.census.gov/geocoder/locations/onelineaddress?address={address2}&benchmark=2020&format=json')
        results = df.iloc[:1, 0][0][0]['coordinates']
        lat, lon = results['y'], results['x']
    except:
        geolocator = Nominatim(user_agent='GTA Lookup')
        geocode = RateLimiter(geolocator.geocode, min_delay_seconds=2)
        location = geolocator.geocode(address)
        lat, lon = location.latitude, location.longitude
        
    return lat, lon

def get_hail_data(address, start_date, end_date, radius_miles, get_max):

    start_date = pd.Timestamp(str(start_date)).strftime('%Y%m%d')
    end_date = pd.Timestamp(str(end_date)).strftime('%Y%m%d')
    date_years = pd.date_range(start=start_date, end=end_date, freq='M')
    date_range_days = pd.date_range(start_date, end_date)
    years = list(set([d.year for d in date_years]))

    if len(years) == 0:
        years = [pd.Timestamp(start_date).year]
        
    # Geocode Address
    lat, lon= geocode_address(address)
    
    # Convert Lat Lon to row & col on Array
    transform = pickle.load(open('Data/transform_mrms.pkl', 'rb'))
    
    row, col = rasterio.transform.rowcol(transform, lon, lat)

    files = [
        'Data/2023_hail.h5',
        'Data/2022_hail.h5',
        'Data/2021_hail.h5',
        'Data/2020_hail.h5'
    ]

    files_choosen = [i for i in files if any(i for j in years if str(j) in i)]

    # Query and Collect H5 Data
    all_data = []
    all_dates = []
    for file in files_choosen:
        with h5py.File(file, 'r') as f:
            # Get Dates from H5
            dates = f['dates'][:]
            date_idx = np.where((dates >= int(start_date))
                                & (dates <= int(end_date)))[0]

            # Select Data by Date and Radius
            dates = dates[date_idx]
            data = f['hail'][date_idx, row-radius_miles:row +
                             radius_miles+1, col-radius_miles:col+radius_miles+1]

            all_data.append(data)
            all_dates.append(dates)

    data_all = np.vstack(all_data)
    dates_all = np.concatenate(all_dates)

    # Convert to Inches
    data_mat = np.where(data_all < 0, 0, data_all)*0.0393701

    # Get Radius of Data
    disk_mask = np.where(disk(radius_miles) == 1, True, False)
    data_mat = np.where(disk_mask, data_mat, -1).round(3)

    # Process to DataFrame
    # Find Max of Data
    if get_max == True:
        data_max = np.max(data_mat, axis=(1, 2))
        df_data = pd.DataFrame({'Date': dates_all,
                               'Hail_max': data_max})
    # Get all Data
    else:
        data_all = list(data_mat)
        df_data = pd.DataFrame({'Date': dates_all,
                               'Hail_all': data_all})

    df_data['Date'] = pd.to_datetime(df_data['Date'], format='%Y%m%d')
    df_data = df_data.set_index('Date')

    df_data = df_data.reindex(date_range_days, fill_value=0).reset_index().rename(
        columns={'index': 'Date'})
    df_data['Date'] = df_data['Date'].dt.strftime('%Y-%m-%d')

    return df_data


@app.get('/Hail_Docker_Data')
async def predict(address: str, start_date: str, end_date: str, radius_miles: int, get_max: bool):

    try:
        results = get_hail_data(address, start_date,
                                end_date, radius_miles, get_max)
    except:
        results = pd.DataFrame({'Date': ['error'], 'Hail_max': ['error']})

    return results.to_json()