Last commit not found
import json | |
from shapely.geometry import Polygon, Point | |
from shapely.ops import unary_union | |
import matplotlib.pyplot as plt | |
from matplotlib.patches import Polygon as MplPolygon | |
import numpy as np | |
import pandas as pd | |
def add_extreme_coordinates(polygon_data): | |
polygon_coords = np.array(polygon_data["geometry"]["coordinates"][0]) | |
polygon_data["geometry"]["max_lat"] = max(polygon_coords[:, 1]) | |
polygon_data["geometry"]["min_lat"] = min(polygon_coords[:, 1]) | |
polygon_data["geometry"]["max_lon"] = max(polygon_coords[:, 0]) | |
polygon_data["geometry"]["min_lon"] = min(polygon_coords[:, 0]) | |
def turn_into_dataframe(data): | |
data_list = data["features"] | |
for i in range(len(data_list)): | |
add_extreme_coordinates(data_list[i]) | |
df = pd.DataFrame(data_list).drop(columns="type") | |
dict_cols = ["properties", "geometry"] | |
for dict_col in dict_cols: | |
dict_df = pd.json_normalize(df[dict_col]) | |
# Merge the new columns back into the original DataFrame | |
df = df.drop(columns=[dict_col]).join(dict_df) | |
df["coordinates"] = df["coordinates"].apply(lambda x: x[0]) | |
df["polygon"] = df["coordinates"].apply(lambda x: Polygon(x)) | |
df = df.drop(columns=["type"]) | |
return df | |
# Function to plot a polygon | |
def plot_polygon(ax, polygon, color, label="label"): | |
if not polygon.is_empty: | |
x, y = polygon.exterior.xy | |
ax.fill(x, y, color=color, alpha=0.5, label=label) | |
def plot_polygons(list_polygons, first_one_different=False, dpi=150): | |
# Plot the polygons and their intersection | |
plt.figure(dpi=dpi) | |
fig, ax = plt.subplots() | |
if first_one_different: | |
plot_polygon(ax, list_polygons[0], "red", f"polygon {0}") | |
for i, polygon in enumerate(list_polygons[1:]): | |
plot_polygon(ax, polygon, "blue", f"polygon {i}") | |
else: | |
for i, polygon in enumerate(list_polygons): | |
plot_polygon(ax, polygon, "blue", f"polygon {i}") | |
# Plot the intersection | |
# plot_polygon(ax, intersection, 'red', 'Intersection') | |
# Add legend | |
# ax.legend() | |
# Set axis limits | |
ax.set_aspect("equal") | |
# Set title | |
ax.set_title("Polygons and their Intersection") | |
plt.ylabel("lat") | |
plt.xlabel("lon") | |
plt.show() | |
def plot_polygons_with_colors(list_polygons, list_colors, dpi=150): | |
# Plot the polygons and their intersection | |
plt.figure(dpi=dpi) | |
fig, ax = plt.subplots() | |
for polygon, color in zip(list_polygons, list_colors): | |
plot_polygon(ax, polygon, color) | |
# Set axis limits | |
ax.set_aspect("equal") | |
# Set title | |
ax.set_title("Polygons and their Intersection") | |
plt.ylabel("lat") | |
plt.xlabel("lon") | |
plt.show() | |
def plot_polygons_from_df(df, dpi=150): | |
list_polygons = [] | |
for index, row in df.iterrows(): | |
list_polygons.append(row["polygon"]) | |
plot_polygons(list_polygons=list_polygons, dpi=dpi) | |
def map_color(id): | |
return "blue" | |
def plot_polygons_from_df_with_color(df, dpi=150): | |
df["plot_colors"] = df["id"].apply(map_color) | |
list_polygons = [] | |
list_colors = [] | |
for index, row in df.iterrows(): | |
list_polygons.append(row["polygon"]) | |
list_colors.append(row["plot_colors"]) | |
plot_polygons_with_colors( | |
list_polygons=list_polygons, list_colors=list_colors, dpi=dpi | |
) | |
def intersection(polygon, polygon_comparison): | |
return polygon.intersection(polygon_comparison) | |
def intersection_area(polygon, polygon_comparison): | |
return intersection(polygon, polygon_comparison).area | |
def intersection_area_ratio(polygon, polygon_comparison): | |
return intersection_area(polygon, polygon_comparison) / polygon.area | |
def containsPoint(polygonB, polygon): | |
coordinatesB = get_coordinates(polygonB) | |
for coord in coordinatesB: | |
coord = Point(coord) | |
if polygon.contains(coord): | |
return True | |
else: | |
return False | |
def get_coordinates(polygon): | |
coordinates = polygon.exterior.coords | |
coordinates = [list(pair) for pair in coordinates] | |
return coordinates | |
def mark_id_to_be_dropped(df, id_string): | |
df.loc[df['id']== id_string , 'to_drop'] = True | |
def mark_id_to_be_merged(df, id_string): | |
df.loc[df['id']== id_string , 'to_merge'] = True | |
def calc_overlapping_subset(df_input, index): | |
max_lat = df_input.iloc[index]['max_lat'] | |
min_lat = df_input.iloc[index]['min_lat'] | |
max_lon = df_input.iloc[index]['max_lon'] | |
min_lon = df_input.iloc[index]['min_lon'] | |
relevant_subset = df_input.loc[( (( ((max_lat < df_input['max_lat']) & (max_lat > df_input['min_lat'])) | \ | |
((min_lat < df_input['max_lat']) & (min_lat > df_input['min_lat'])) )| \ | |
( ((df_input['max_lat'] < max_lat) & (df_input['max_lat'] > min_lat)) | \ | |
((df_input['min_lat'] > min_lat ) & ( df_input['min_lat'] < max_lat)) ) ) & \ | |
(( ( ((max_lon < df_input['max_lon']) & (max_lon > df_input['min_lon'])) | \ | |
((min_lon < df_input['max_lon']) & (min_lon > df_input['min_lon'])) ) ) | | |
( ((df_input['max_lon'] < max_lon ) & (df_input['max_lon'] > min_lon)) | \ | |
((df_input['min_lon'] > min_lon) & (df_input['min_lon'] < max_lon)) ) ) )] | |
return relevant_subset | |
def remove_contained_poylgons(df_input): | |
df_result = df_input.copy() | |
for i in range (len(df_result)): | |
polygonA = df_input.iloc[i]['polygon'] | |
#relevant_subset = df_result[df_result['polygon'].apply(lambda polygonB: containsPoint(polygonA, polygonB))] | |
#relevant_subset = relevant_subset[relevant_subset['id'] != df_input.iloc[i]['id']] | |
relevant_subset = calc_overlapping_subset(df_input = df_result, index = i) | |
# Experiment with this parameter to find the best threshold | |
# It certainly has to be smaller than 0.9 | |
threshold = 0.85 | |
for j in range(len(relevant_subset)): | |
ratio_current_choice = intersection_area_ratio(polygon = polygonA, polygon_comparison = relevant_subset.iloc[j]['polygon']) | |
ratio_alternative_choice = intersection_area_ratio(polygon = relevant_subset.iloc[j]['polygon'], polygon_comparison= polygonA) | |
if (ratio_current_choice > threshold) or (ratio_alternative_choice > threshold): # or ratio_alternative_choice > threashold: | |
if polygonA.area > relevant_subset.iloc[j]['polygon'].area: | |
mark_id_to_be_dropped(df=df_result, id_string = relevant_subset.iloc[j]['id']) | |
else: | |
mark_id_to_be_dropped(df=df_result, id_string = df_input.iloc[i]['id']) | |
#remove all polygons that had a marked id | |
df_result = df_result.loc[df_result["to_drop"] == False] | |
return df_result | |
def merge(df_input, polygon_index, merge_subset): | |
for j in range(len(merge_subset)): | |
#merge merged_polygon with j-th polygon in merge_subset | |
#delete j_th polygon in merge_subset from df_input | |
merged_polygon = df_input.iloc[polygon_index] | |
merged_polygon_id = df_input.iloc[polygon_index]['id'] | |
merged_polygon_index = merged_polygon.index | |
#change by merge --> polygon, coordinates, min/max long lat, score (use max or min or avg) | |
tmp = merged_polygon['polygon'].union(merge_subset.iloc[j]['polygon']) | |
merged_coordinates = list(tmp.exterior.coords) | |
merged_polygon = Polygon(merged_coordinates) #new polygon | |
coordinates = [list(tup) for tup in merged_coordinates] #new coordinates | |
#updating min/max long/lat | |
min_lon = min([point[0] for point in coordinates]) | |
max_lon = max([point[0] for point in coordinates]) | |
min_lat = min([point[1] for point in coordinates]) | |
max_lat = max([point[1] for point in coordinates]) | |
polygon_score = merge_subset.iloc[j]['Confidence_score'] | |
#updating merged polygon | |
df_input.loc[df_input['id'] == merged_polygon_id,'polygon'] = merged_polygon | |
df_input.loc[df_input['id'] == merged_polygon_id,'min_lon'] = min_lon | |
df_input.loc[df_input['id'] == merged_polygon_id,'max_lon'] = max_lon | |
df_input.loc[df_input['id'] == merged_polygon_id,'min_lat'] = min_lat | |
df_input.loc[df_input['id'] == merged_polygon_id,'max_lat'] = max_lat | |
df_input.loc[df_input['id'] == merged_polygon_id,'Confidence_score'] = (df_input.iloc[polygon_index]['Confidence_score'] + polygon_score)/2 | |
df_input.loc[df_input['id'] == merged_polygon_id, 'coordinates'] = df_input.loc[df_input['id'] == merged_polygon_id, 'polygon'].apply(get_coordinates) | |
df_input = df_input.loc[df_input['id'] != merge_subset.iloc[j]['id']] | |
return df_input | |
def merge_overlapping(df_input): | |
# Experiment with this parameter to get the best results | |
threshold = 0.40 | |
#df_result = df_input.copy() | |
for i in range(len(df_input)): | |
polygon = df_input.iloc[i]['polygon'] | |
relevant_subset = calc_overlapping_subset(df_input=df_input, index=i) | |
toBeMerged = False | |
for j in range(len(relevant_subset)): | |
ratio_current_choice = intersection_area_ratio(polygon = polygon, polygon_comparison = relevant_subset.iloc[j]['polygon']) | |
ratio_alternative_choice = intersection_area_ratio(polygon = relevant_subset.iloc[j]['polygon'], polygon_comparison= polygon) | |
if (ratio_current_choice > threshold) or (ratio_alternative_choice > threshold): | |
toBeMerged = True | |
mark_id_to_be_merged(df=relevant_subset, id_string = relevant_subset.iloc[j]['id']) | |
if toBeMerged: | |
# deleting is handled in this funciton as well | |
df_input = merge(df_input=df_input, polygon_index=i, merge_subset=relevant_subset[relevant_subset['to_merge']==True]) | |
return True, df_input | |
return False, df_input | |
def process(list_df): | |
df_res = pd.concat(list_df) | |
df_res = remove_contained_poylgons(df_input= df_res) | |
i = 0 | |
merged, df_res = merge_overlapping(df_input=df_res) | |
while(merged): | |
i+=1 | |
if i%100 == 0: | |
print(i) | |
merged, df_res = merge_overlapping(df_input=df_res) | |
return df_res | |
def combine_different_tile_size(df_smaller, df_bigger): | |
df_result = df_bigger.copy() | |
for i in range(len(df_smaller)): | |
max_lat = df_smaller.iloc[i]["max_lat"] | |
min_lat = df_smaller.iloc[i]["min_lat"] | |
max_lon = df_smaller.iloc[i]["max_lon"] | |
min_lon = df_smaller.iloc[i]["min_lon"] | |
polygon = df_smaller.iloc[i]["polygon"] | |
relevant_subset = df_bigger.loc[ | |
( | |
((max_lat < df_bigger["max_lat"]) & (max_lat > df_bigger["min_lat"])) | |
| ((min_lat < df_bigger["max_lat"]) & (min_lat > df_bigger["min_lat"])) | |
) | |
& ( | |
((max_lon < df_bigger["max_lon"]) & (max_lon > df_bigger["min_lon"])) | |
| ((min_lon < df_bigger["max_lon"]) & (min_lon > df_bigger["min_lon"])) | |
) | |
] | |
list_polygons = [polygon] | |
for index, row in relevant_subset.iterrows(): | |
list_polygons.append(row["polygon"]) | |
add_polygon = True | |
threashold = 0.15 | |
for comparison_polygon in list_polygons[1:]: | |
ratio = intersection_area_ratio(polygon, comparison_polygon) | |
if ratio > threashold: | |
add_polygon = False | |
if add_polygon: | |
# df_result = pd.concat([df_result, df_result.iloc[[i]]], axis= 1, ignore_index=True)#df_result.append(df_result.iloc[i], ignore_index=True) | |
df_result = pd.concat( | |
[df_result, df_smaller.iloc[[i]]], axis=0, join="outer" | |
) # | |
return df_result | |
def clean(df, score_threashold=0.5): | |
df = df.loc[df["score"] > score_threashold] | |
return df | |
def row_to_feature(row): | |
feature = { | |
"id": row["id"], | |
"type": "Feature", | |
"properties": {"Confidence_score": row["Confidence_score"]}, | |
"geometry": {"type": "Polygon", "coordinates": [row["coordinates"]]}, | |
} | |
return feature | |
def export_df_as_geojson(df, filename="output"): | |
features = [row_to_feature(row) for idx, row in df.iterrows()] | |
feature_collection = { | |
"type": "FeatureCollection", | |
"crs": {"type": "name", "properties": {"name": "urn:ogc:def:crs:EPSG::32720"}}, | |
"features": features, | |
} | |
output_geojson = json.dumps(feature_collection) | |
with open(f"{filename}.geojson", "w") as f: | |
f.write(output_geojson) | |
print(f"GeoJSON data exported to '{filename}.geojson' file.") | |
def convert_id_to_string(prefix, x): | |
return prefix + str(x) | |
def postprocess(prediction_geojson_path, store_path): | |
with open(prediction_geojson_path,"r",) as file: | |
prediction_data = json.load(file) | |
df = turn_into_dataframe(prediction_data) | |
df["id"] = df.index | |
df['Confidence_score'] = df['Confidence_score'].astype(float) | |
df["id"] = df["id"].apply(lambda x: convert_id_to_string("df_", x)) | |
df["to_drop"] = False | |
df["to_merge"] = False | |
df_res = process([df]) | |
export_df_as_geojson(df=df_res, filename=store_path) | |
return df_res |