{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/opt/miniconda3/envs/hackathon/lib/python3.13/site-packages/fuzzywuzzy/fuzz.py:11: UserWarning: Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning\n", " warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning')\n" ] } ], "source": [ "import pandas as pd\n", "from fuzzywuzzy import process\n", "from typing import List\n", "\n", "\n", "def get_popular_agriculture(\n", " df: pd.DataFrame,\n", " region: str,\n", " surface_parc: str = None,\n", " groups_agri: List[str] = None,\n", "): # surface en hectare\n", " unique_regions = df[\"REGION\"].unique().tolist()\n", " region, _ = process.extractOne(region, unique_regions)\n", " df = df[df[\"REGION\"] == region]\n", " if groups_agri != None:\n", " unique_agri_group = df[\"LIBELLE_GROUPE_CULTURE\"].unique().tolist()\n", " groups_agri_new = []\n", " for group_culture in groups_agri:\n", " groups_agri_new.append(process.extractOne(group_culture, unique_agri_group)[0])\n", " df = df[df[\"LIBELLE_GROUPE_CULTURE\"].isin(groups_agri_new)]\n", " if surface_parc != None:\n", " popular_cultures = df[\"LIBELLE_CULTURE\"].value_counts().head(10).index\n", " df = df[df[\"LIBELLE_CULTURE\"].isin(popular_cultures)]\n", " df[\"proximity\"] = abs(df[\"SURF_PARC\"] - surface_parc)\n", " sorted_df = df.sort_values(by=\"proximity\").drop(columns=\"proximity\")\n", " return popular_cultures, sorted_df[[\"LIBELLE_CULTURE\",\"LIBELLE_GROUPE_CULTURE\",\"SURF_PARC\"]]\n", " popular_cultures = df[\"LIBELLE_CULTURE\"].value_counts().head(10).index\n", " df = df[df[\"LIBELLE_CULTURE\"].isin(popular_cultures)]\n", " return popular_cultures, df[[\"LIBELLE_CULTURE\",\"LIBELLE_GROUPE_CULTURE\",\"SURF_PARC\"]]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
LIBELLE_CULTURELIBELLE_GROUPE_CULTURESURF_PARC
26Autre plante fourragère annuelle (ni légumineu...Fourrage118.12
49AïlLégumes ou fleurs49.67
61Plantes médicinales et à parfum non pérennes (...Autres cultures industrielles29.14
78Plante aromatique pérenne non arbustive ou arb...Autres cultures industrielles20.14
14Autre culture pérenne et jachère dans les bana...Divers5.31
37AgrumeVergers3.18
2Plantes aromatiques herbacées non pérennes (< ...Autres cultures industrielles277.71
117Avoine de printempsAutres céréales877.02
91ArtichautLégumes ou fleurs2953.38
104Avoine d’hiverAutres céréales5176.17
\n", "
" ], "text/plain": [ " LIBELLE_CULTURE \\\n", "26 Autre plante fourragère annuelle (ni légumineu... \n", "49 Aïl \n", "61 Plantes médicinales et à parfum non pérennes (... \n", "78 Plante aromatique pérenne non arbustive ou arb... \n", "14 Autre culture pérenne et jachère dans les bana... \n", "37 Agrume \n", "2 Plantes aromatiques herbacées non pérennes (< ... \n", "117 Avoine de printemps \n", "91 Artichaut \n", "104 Avoine d’hiver \n", "\n", " LIBELLE_GROUPE_CULTURE SURF_PARC \n", "26 Fourrage 118.12 \n", "49 Légumes ou fleurs 49.67 \n", "61 Autres cultures industrielles 29.14 \n", "78 Autres cultures industrielles 20.14 \n", "14 Divers 5.31 \n", "37 Vergers 3.18 \n", "2 Autres cultures industrielles 277.71 \n", "117 Autres céréales 877.02 \n", "91 Légumes ou fleurs 2953.38 \n", "104 Autres céréales 5176.17 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv(\"./data_rpg/data_prepared_rpg.csv\")\n", "\n", "pop, sorted_df = get_popular_agriculture(\n", " df,\n", " \"Bretagne\",\n", " 132.5\n", " \n", ")\n", "sorted_df" ] } ], "metadata": { "kernelspec": { "display_name": "hackathon", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.2" } }, "nbformat": 4, "nbformat_minor": 2 }