Akram Sanad commited on
Commit
1ae7d3d
·
1 Parent(s): 7a4e6e5

utils directory and rpg utils added

Browse files
Files changed (2) hide show
  1. rpg_utils.ipynb +2 -2
  2. utils/rpg_utils.py +29 -0
rpg_utils.ipynb CHANGED
@@ -39,7 +39,7 @@
39
  },
40
  {
41
  "cell_type": "code",
42
- "execution_count": 72,
43
  "metadata": {},
44
  "outputs": [],
45
  "source": [
@@ -49,7 +49,7 @@
49
  " df,\n",
50
  " \"Bretagne\",\n",
51
  " 132.5\n",
52
- "\n",
53
  ")"
54
  ]
55
  },
 
39
  },
40
  {
41
  "cell_type": "code",
42
+ "execution_count": null,
43
  "metadata": {},
44
  "outputs": [],
45
  "source": [
 
49
  " df,\n",
50
  " \"Bretagne\",\n",
51
  " 132.5\n",
52
+ " \n",
53
  ")"
54
  ]
55
  },
utils/rpg_utils.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from fuzzywuzzy import process
3
+ from typing import List
4
+
5
+
6
+ def get_popular_agriculture(
7
+ df: pd.DataFrame,
8
+ region: str,
9
+ surface_parc: str = None,
10
+ groups_agri: List[str] = None,
11
+ ): # surface en hectare
12
+ unique_regions = df["REGION"].unique().tolist()
13
+ region, _ = process.extractOne(region, unique_regions)
14
+ df = df[df["REGION"] == region]
15
+ if groups_agri != None:
16
+ unique_agri_group = df["LIBELLE_GROUPE_CULTURE"].unique().tolist()
17
+ groups_agri_new = []
18
+ for group_culture in groups_agri:
19
+ groups_agri_new.append(process.extractOne(group_culture, unique_agri_group)[0])
20
+ df = df[df["LIBELLE_GROUPE_CULTURE"].isin(groups_agri_new)]
21
+ if surface_parc != None:
22
+ popular_cultures = df["LIBELLE_CULTURE"].value_counts().head(10).index
23
+ df = df[df["LIBELLE_CULTURE"].isin(popular_cultures)]
24
+ df["proximity"] = abs(df["SURF_PARC"] - surface_parc)
25
+ sorted_df = df.sort_values(by="proximity").drop(columns="proximity")
26
+ return popular_cultures, sorted_df[["LIBELLE_CULTURE","LIBELLE_GROUPE_CULTURE","SURF_PARC"]]
27
+ popular_cultures = df["LIBELLE_CULTURE"].value_counts().head(10).index
28
+ df = df[df["LIBELLE_CULTURE"].isin(popular_cultures)]
29
+ return popular_cultures, df[["LIBELLE_CULTURE","LIBELLE_GROUPE_CULTURE","SURF_PARC"]]