Musterdatenkatalog / utils /add_missing.py
Rahka's picture
update scripts with path
ad87157 verified
from pathlib import Path
import pandas as pd
MISSING_ENRICHED = Path(__file__).parent.parent / "data" / "missing_enriched.csv"
CITIES_ENRICHED = Path(__file__).parent.parent / "data" / "cities_enriched_final.csv"
CITIES_ENRICHED_MANUALLY = (
Path(__file__).parent.parent / "data" / "cities_enriched_manually.csv"
)
def merge_missing_coord(df, missing):
for row in df.itertuples():
if row.Kommune in missing["Kommune"].values:
new_geo = missing[missing["Kommune"] == row.Kommune].iloc[0].Geometry
df.at[row.Index, "Geometry"] = new_geo
return df
cities = pd.read_csv(CITIES_ENRICHED)
if "_1" in cities.columns:
cities.drop(columns=["_1"], inplace=True)
cities_final = merge_missing_coord(
pd.read_csv(CITIES_ENRICHED), pd.read_csv(MISSING_ENRICHED)
)
cities_final.to_csv(CITIES_ENRICHED_MANUALLY, index=False)
still_missing = cities_final[
cities_final["Geometry"].apply(lambda x: (x == "[]") or x is None)
]
print(
f"There are still missing {len(still_missing)} coordinates. The rows are: {still_missing}"
)
print(cities_final.info())