{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/miniconda3/envs/hackathon/lib/python3.13/site-packages/fuzzywuzzy/fuzz.py:11: UserWarning: Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning\n",
      "  warnings.warn('Using slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning')\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from fuzzywuzzy import process\n",
    "from typing import List\n",
    "\n",
    "\n",
    "def get_popular_agriculture(\n",
    "    df: pd.DataFrame,\n",
    "    region: str,\n",
    "    surface_parc: str = None,\n",
    "    groups_agri: List[str] = None,\n",
    "):  # surface en hectare\n",
    "    unique_regions = df[\"REGION\"].unique().tolist()\n",
    "    region, _ = process.extractOne(region, unique_regions)\n",
    "    df = df[df[\"REGION\"] == region]\n",
    "    if groups_agri != None:\n",
    "        unique_agri_group = df[\"LIBELLE_GROUPE_CULTURE\"].unique().tolist()\n",
    "        groups_agri_new = []\n",
    "        for group_culture in groups_agri:\n",
    "            groups_agri_new.append(process.extractOne(group_culture, unique_agri_group)[0])\n",
    "        df = df[df[\"LIBELLE_GROUPE_CULTURE\"].isin(groups_agri_new)]\n",
    "    if surface_parc != None:\n",
    "        popular_cultures = df[\"LIBELLE_CULTURE\"].value_counts().head(10).index\n",
    "        df = df[df[\"LIBELLE_CULTURE\"].isin(popular_cultures)]\n",
    "        df[\"proximity\"] = abs(df[\"SURF_PARC\"] - surface_parc)\n",
    "        sorted_df = df.sort_values(by=\"proximity\").drop(columns=\"proximity\")\n",
    "        return popular_cultures, sorted_df[[\"LIBELLE_CULTURE\",\"LIBELLE_GROUPE_CULTURE\",\"SURF_PARC\"]]\n",
    "    popular_cultures = df[\"LIBELLE_CULTURE\"].value_counts().head(10).index\n",
    "    df = df[df[\"LIBELLE_CULTURE\"].isin(popular_cultures)]\n",
    "    return popular_cultures, df[[\"LIBELLE_CULTURE\",\"LIBELLE_GROUPE_CULTURE\",\"SURF_PARC\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>LIBELLE_CULTURE</th>\n",
       "      <th>LIBELLE_GROUPE_CULTURE</th>\n",
       "      <th>SURF_PARC</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>Autre plante fourragère annuelle (ni légumineu...</td>\n",
       "      <td>Fourrage</td>\n",
       "      <td>118.12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>Aïl</td>\n",
       "      <td>Légumes ou fleurs</td>\n",
       "      <td>49.67</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61</th>\n",
       "      <td>Plantes médicinales et à parfum non pérennes (...</td>\n",
       "      <td>Autres cultures industrielles</td>\n",
       "      <td>29.14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>78</th>\n",
       "      <td>Plante aromatique pérenne non arbustive ou arb...</td>\n",
       "      <td>Autres cultures industrielles</td>\n",
       "      <td>20.14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>Autre culture pérenne et jachère dans les bana...</td>\n",
       "      <td>Divers</td>\n",
       "      <td>5.31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>Agrume</td>\n",
       "      <td>Vergers</td>\n",
       "      <td>3.18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Plantes aromatiques herbacées non pérennes (&lt; ...</td>\n",
       "      <td>Autres cultures industrielles</td>\n",
       "      <td>277.71</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>117</th>\n",
       "      <td>Avoine de printemps</td>\n",
       "      <td>Autres céréales</td>\n",
       "      <td>877.02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>91</th>\n",
       "      <td>Artichaut</td>\n",
       "      <td>Légumes ou fleurs</td>\n",
       "      <td>2953.38</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>104</th>\n",
       "      <td>Avoine d’hiver</td>\n",
       "      <td>Autres céréales</td>\n",
       "      <td>5176.17</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                       LIBELLE_CULTURE  \\\n",
       "26   Autre plante fourragère annuelle (ni légumineu...   \n",
       "49                                                 Aïl   \n",
       "61   Plantes médicinales et à parfum non pérennes (...   \n",
       "78   Plante aromatique pérenne non arbustive ou arb...   \n",
       "14   Autre culture pérenne et jachère dans les bana...   \n",
       "37                                              Agrume   \n",
       "2    Plantes aromatiques herbacées non pérennes (< ...   \n",
       "117                                Avoine de printemps   \n",
       "91                                           Artichaut   \n",
       "104                                     Avoine d’hiver   \n",
       "\n",
       "            LIBELLE_GROUPE_CULTURE  SURF_PARC  \n",
       "26                        Fourrage     118.12  \n",
       "49               Légumes ou fleurs      49.67  \n",
       "61   Autres cultures industrielles      29.14  \n",
       "78   Autres cultures industrielles      20.14  \n",
       "14                          Divers       5.31  \n",
       "37                         Vergers       3.18  \n",
       "2    Autres cultures industrielles     277.71  \n",
       "117                Autres céréales     877.02  \n",
       "91               Légumes ou fleurs    2953.38  \n",
       "104                Autres céréales    5176.17  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv(\"./data_rpg/data_prepared_rpg.csv\")\n",
    "\n",
    "pop, sorted_df = get_popular_agriculture(\n",
    "    df,\n",
    "    \"Bretagne\",\n",
    "    132.5\n",
    "    \n",
    ")\n",
    "sorted_df"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "hackathon",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}