import pandas as pd from fuzzywuzzy import process from typing import List def get_popular_agriculture( df: pd.DataFrame, region: str, surface_parc: str = None, groups_agri: List[str] = None, ): # surface en hectare unique_regions = df["REGION"].unique().tolist() region, _ = process.extractOne(region, unique_regions) df = df[df["REGION"] == region] if groups_agri != None: unique_agri_group = df["LIBELLE_GROUPE_CULTURE"].unique().tolist() groups_agri_new = [] for group_culture in groups_agri: groups_agri_new.append(process.extractOne(group_culture, unique_agri_group)[0]) df = df[df["LIBELLE_GROUPE_CULTURE"].isin(groups_agri_new)] if surface_parc != None: popular_cultures = df["LIBELLE_CULTURE"].value_counts().head(10).index df = df[df["LIBELLE_CULTURE"].isin(popular_cultures)] df["proximity"] = abs(df["SURF_PARC"] - surface_parc) sorted_df = df.sort_values(by="proximity").drop(columns="proximity") return popular_cultures, sorted_df[["LIBELLE_CULTURE","LIBELLE_GROUPE_CULTURE","SURF_PARC"]] popular_cultures = df["LIBELLE_CULTURE"].value_counts().head(10).index df = df[df["LIBELLE_CULTURE"].isin(popular_cultures)] return popular_cultures, df[["LIBELLE_CULTURE","LIBELLE_GROUPE_CULTURE","SURF_PARC"]]