Musterdatenkatalog / utils /add_missing.py
Josephina's picture
Refactored Musterdatenkatalog with APP (#1)
4eea983 verified
raw
history blame
1.03 kB
import pandas as pd
import os
MISSING_ENRICHED = os.path.join("data", "missing_enriched.csv")
CITIES_ENRICHED = os.path.join("data", "cities_enriched_final.csv")
CITIES_ENRICHED_MANUALLY = os.path.join("data", "cities_enriched_manually.csv")
def merge_missing_coord(df, missing):
for row in df.itertuples():
if row.Kommune in missing["Kommune"].values:
new_geo = missing[missing["Kommune"] == row.Kommune].iloc[0].Geometry
df.at[row.Index, "Geometry"] = new_geo
return df
cities = pd.read_csv(CITIES_ENRICHED)
if "_1" in cities.columns:
cities.drop(columns=["_1"], inplace=True)
cities_final = merge_missing_coord(
pd.read_csv(CITIES_ENRICHED), pd.read_csv(MISSING_ENRICHED)
)
cities_final.to_csv(CITIES_ENRICHED_MANUALLY, index=False)
still_missing = cities_final[
cities_final["Geometry"].apply(lambda x: (x == "[]") or x is None)
]
print(
f"There are still missing {len(still_missing)} coordinates. The rows are: {still_missing}"
)
print(cities_final.info())