Akram Sanad commited on
Commit
7a4e6e5
·
1 Parent(s): b23cefb

useful functions for rpg Data usage

Browse files
Files changed (1) hide show
  1. rpg_utils.ipynb +340 -0
rpg_utils.ipynb ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 69,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import pandas as pd\n",
10
+ "from fuzzywuzzy import process\n",
11
+ "from typing import List\n",
12
+ "\n",
13
+ "\n",
14
+ "def get_popular_agriculture(\n",
15
+ " df: pd.DataFrame,\n",
16
+ " region: str,\n",
17
+ " surface_parc: str = None,\n",
18
+ " groups_agri: List[str] = None,\n",
19
+ "): # surface en hectare\n",
20
+ " unique_regions = df[\"REGION\"].unique().tolist()\n",
21
+ " region, _ = process.extractOne(region, unique_regions)\n",
22
+ " df = df[df[\"REGION\"] == region]\n",
23
+ " if groups_agri != None:\n",
24
+ " unique_agri_group = df[\"LIBELLE_GROUPE_CULTURE\"].unique().tolist()\n",
25
+ " groups_agri_new = []\n",
26
+ " for group_culture in groups_agri:\n",
27
+ " groups_agri_new.append(process.extractOne(group_culture, unique_agri_group)[0])\n",
28
+ " df = df[df[\"LIBELLE_GROUPE_CULTURE\"].isin(groups_agri_new)]\n",
29
+ " if surface_parc != None:\n",
30
+ " popular_cultures = df[\"LIBELLE_CULTURE\"].value_counts().head(10).index\n",
31
+ " df = df[df[\"LIBELLE_CULTURE\"].isin(popular_cultures)]\n",
32
+ " df[\"proximity\"] = abs(df[\"SURF_PARC\"] - surface_parc)\n",
33
+ " sorted_df = df.sort_values(by=\"proximity\").drop(columns=\"proximity\")\n",
34
+ " return popular_cultures, sorted_df[[\"LIBELLE_CULTURE\",\"LIBELLE_GROUPE_CULTURE\",\"SURF_PARC\"]]\n",
35
+ " popular_cultures = df[\"LIBELLE_CULTURE\"].value_counts().head(10).index\n",
36
+ " df = df[df[\"LIBELLE_CULTURE\"].isin(popular_cultures)]\n",
37
+ " return popular_cultures, df[[\"LIBELLE_CULTURE\",\"LIBELLE_GROUPE_CULTURE\",\"SURF_PARC\"]]"
38
+ ]
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "execution_count": 72,
43
+ "metadata": {},
44
+ "outputs": [],
45
+ "source": [
46
+ "df = pd.read_csv(\"./data_rpg/data_prepared_rpg.csv\")\n",
47
+ "\n",
48
+ "pop, sorted_df = get_popular_agriculture(\n",
49
+ " df,\n",
50
+ " \"Bretagne\",\n",
51
+ " 132.5\n",
52
+ "\n",
53
+ ")"
54
+ ]
55
+ },
56
+ {
57
+ "cell_type": "code",
58
+ "execution_count": 73,
59
+ "metadata": {},
60
+ "outputs": [
61
+ {
62
+ "data": {
63
+ "text/html": [
64
+ "<div>\n",
65
+ "<style scoped>\n",
66
+ " .dataframe tbody tr th:only-of-type {\n",
67
+ " vertical-align: middle;\n",
68
+ " }\n",
69
+ "\n",
70
+ " .dataframe tbody tr th {\n",
71
+ " vertical-align: top;\n",
72
+ " }\n",
73
+ "\n",
74
+ " .dataframe thead th {\n",
75
+ " text-align: right;\n",
76
+ " }\n",
77
+ "</style>\n",
78
+ "<table border=\"1\" class=\"dataframe\">\n",
79
+ " <thead>\n",
80
+ " <tr style=\"text-align: right;\">\n",
81
+ " <th></th>\n",
82
+ " <th>LIBELLE_CULTURE</th>\n",
83
+ " <th>LIBELLE_GROUPE_CULTURE</th>\n",
84
+ " <th>SURF_PARC</th>\n",
85
+ " </tr>\n",
86
+ " </thead>\n",
87
+ " <tbody>\n",
88
+ " <tr>\n",
89
+ " <th>26</th>\n",
90
+ " <td>Autre plante fourragère annuelle (ni légumineu...</td>\n",
91
+ " <td>Fourrage</td>\n",
92
+ " <td>118.12</td>\n",
93
+ " </tr>\n",
94
+ " <tr>\n",
95
+ " <th>49</th>\n",
96
+ " <td>Aïl</td>\n",
97
+ " <td>Légumes ou fleurs</td>\n",
98
+ " <td>49.67</td>\n",
99
+ " </tr>\n",
100
+ " <tr>\n",
101
+ " <th>61</th>\n",
102
+ " <td>Plantes médicinales et à parfum non pérennes (...</td>\n",
103
+ " <td>Autres cultures industrielles</td>\n",
104
+ " <td>29.14</td>\n",
105
+ " </tr>\n",
106
+ " <tr>\n",
107
+ " <th>78</th>\n",
108
+ " <td>Plante aromatique pérenne non arbustive ou arb...</td>\n",
109
+ " <td>Autres cultures industrielles</td>\n",
110
+ " <td>20.14</td>\n",
111
+ " </tr>\n",
112
+ " <tr>\n",
113
+ " <th>14</th>\n",
114
+ " <td>Autre culture pérenne et jachère dans les bana...</td>\n",
115
+ " <td>Divers</td>\n",
116
+ " <td>5.31</td>\n",
117
+ " </tr>\n",
118
+ " <tr>\n",
119
+ " <th>37</th>\n",
120
+ " <td>Agrume</td>\n",
121
+ " <td>Vergers</td>\n",
122
+ " <td>3.18</td>\n",
123
+ " </tr>\n",
124
+ " <tr>\n",
125
+ " <th>2</th>\n",
126
+ " <td>Plantes aromatiques herbacées non pérennes (&lt; ...</td>\n",
127
+ " <td>Autres cultures industrielles</td>\n",
128
+ " <td>277.71</td>\n",
129
+ " </tr>\n",
130
+ " <tr>\n",
131
+ " <th>117</th>\n",
132
+ " <td>Avoine de printemps</td>\n",
133
+ " <td>Autres céréales</td>\n",
134
+ " <td>877.02</td>\n",
135
+ " </tr>\n",
136
+ " <tr>\n",
137
+ " <th>91</th>\n",
138
+ " <td>Artichaut</td>\n",
139
+ " <td>Légumes ou fleurs</td>\n",
140
+ " <td>2953.38</td>\n",
141
+ " </tr>\n",
142
+ " <tr>\n",
143
+ " <th>104</th>\n",
144
+ " <td>Avoine d’hiver</td>\n",
145
+ " <td>Autres céréales</td>\n",
146
+ " <td>5176.17</td>\n",
147
+ " </tr>\n",
148
+ " </tbody>\n",
149
+ "</table>\n",
150
+ "</div>"
151
+ ],
152
+ "text/plain": [
153
+ " LIBELLE_CULTURE \\\n",
154
+ "26 Autre plante fourragère annuelle (ni légumineu... \n",
155
+ "49 Aïl \n",
156
+ "61 Plantes médicinales et à parfum non pérennes (... \n",
157
+ "78 Plante aromatique pérenne non arbustive ou arb... \n",
158
+ "14 Autre culture pérenne et jachère dans les bana... \n",
159
+ "37 Agrume \n",
160
+ "2 Plantes aromatiques herbacées non pérennes (< ... \n",
161
+ "117 Avoine de printemps \n",
162
+ "91 Artichaut \n",
163
+ "104 Avoine d’hiver \n",
164
+ "\n",
165
+ " LIBELLE_GROUPE_CULTURE SURF_PARC \n",
166
+ "26 Fourrage 118.12 \n",
167
+ "49 Légumes ou fleurs 49.67 \n",
168
+ "61 Autres cultures industrielles 29.14 \n",
169
+ "78 Autres cultures industrielles 20.14 \n",
170
+ "14 Divers 5.31 \n",
171
+ "37 Vergers 3.18 \n",
172
+ "2 Autres cultures industrielles 277.71 \n",
173
+ "117 Autres céréales 877.02 \n",
174
+ "91 Légumes ou fleurs 2953.38 \n",
175
+ "104 Autres céréales 5176.17 "
176
+ ]
177
+ },
178
+ "execution_count": 73,
179
+ "metadata": {},
180
+ "output_type": "execute_result"
181
+ }
182
+ ],
183
+ "source": [
184
+ "sorted_df"
185
+ ]
186
+ },
187
+ {
188
+ "cell_type": "code",
189
+ "execution_count": 71,
190
+ "metadata": {},
191
+ "outputs": [
192
+ {
193
+ "data": {
194
+ "text/html": [
195
+ "<div>\n",
196
+ "<style scoped>\n",
197
+ " .dataframe tbody tr th:only-of-type {\n",
198
+ " vertical-align: middle;\n",
199
+ " }\n",
200
+ "\n",
201
+ " .dataframe tbody tr th {\n",
202
+ " vertical-align: top;\n",
203
+ " }\n",
204
+ "\n",
205
+ " .dataframe thead th {\n",
206
+ " text-align: right;\n",
207
+ " }\n",
208
+ "</style>\n",
209
+ "<table border=\"1\" class=\"dataframe\">\n",
210
+ " <thead>\n",
211
+ " <tr style=\"text-align: right;\">\n",
212
+ " <th></th>\n",
213
+ " <th>LIBELLE_CULTURE</th>\n",
214
+ " <th>LIBELLE_GROUPE_CULTURE</th>\n",
215
+ " <th>SURF_PARC</th>\n",
216
+ " </tr>\n",
217
+ " </thead>\n",
218
+ " <tbody>\n",
219
+ " <tr>\n",
220
+ " <th>2</th>\n",
221
+ " <td>Plantes aromatiques herbacées non pérennes (&lt; ...</td>\n",
222
+ " <td>Autres cultures industrielles</td>\n",
223
+ " <td>277.71</td>\n",
224
+ " </tr>\n",
225
+ " <tr>\n",
226
+ " <th>14</th>\n",
227
+ " <td>Autre culture pérenne et jachère dans les bana...</td>\n",
228
+ " <td>Divers</td>\n",
229
+ " <td>5.31</td>\n",
230
+ " </tr>\n",
231
+ " <tr>\n",
232
+ " <th>26</th>\n",
233
+ " <td>Autre plante fourragère annuelle (ni légumineu...</td>\n",
234
+ " <td>Fourrage</td>\n",
235
+ " <td>118.12</td>\n",
236
+ " </tr>\n",
237
+ " <tr>\n",
238
+ " <th>37</th>\n",
239
+ " <td>Agrume</td>\n",
240
+ " <td>Vergers</td>\n",
241
+ " <td>3.18</td>\n",
242
+ " </tr>\n",
243
+ " <tr>\n",
244
+ " <th>49</th>\n",
245
+ " <td>Aïl</td>\n",
246
+ " <td>Légumes ou fleurs</td>\n",
247
+ " <td>49.67</td>\n",
248
+ " </tr>\n",
249
+ " <tr>\n",
250
+ " <th>61</th>\n",
251
+ " <td>Plantes médicinales et à parfum non pérennes (...</td>\n",
252
+ " <td>Autres cultures industrielles</td>\n",
253
+ " <td>29.14</td>\n",
254
+ " </tr>\n",
255
+ " <tr>\n",
256
+ " <th>78</th>\n",
257
+ " <td>Plante aromatique pérenne non arbustive ou arb...</td>\n",
258
+ " <td>Autres cultures industrielles</td>\n",
259
+ " <td>20.14</td>\n",
260
+ " </tr>\n",
261
+ " <tr>\n",
262
+ " <th>91</th>\n",
263
+ " <td>Artichaut</td>\n",
264
+ " <td>Légumes ou fleurs</td>\n",
265
+ " <td>2953.38</td>\n",
266
+ " </tr>\n",
267
+ " <tr>\n",
268
+ " <th>104</th>\n",
269
+ " <td>Avoine d’hiver</td>\n",
270
+ " <td>Autres céréales</td>\n",
271
+ " <td>5176.17</td>\n",
272
+ " </tr>\n",
273
+ " <tr>\n",
274
+ " <th>117</th>\n",
275
+ " <td>Avoine de printemps</td>\n",
276
+ " <td>Autres céréales</td>\n",
277
+ " <td>877.02</td>\n",
278
+ " </tr>\n",
279
+ " </tbody>\n",
280
+ "</table>\n",
281
+ "</div>"
282
+ ],
283
+ "text/plain": [
284
+ " LIBELLE_CULTURE \\\n",
285
+ "2 Plantes aromatiques herbacées non pérennes (< ... \n",
286
+ "14 Autre culture pérenne et jachère dans les bana... \n",
287
+ "26 Autre plante fourragère annuelle (ni légumineu... \n",
288
+ "37 Agrume \n",
289
+ "49 Aïl \n",
290
+ "61 Plantes médicinales et à parfum non pérennes (... \n",
291
+ "78 Plante aromatique pérenne non arbustive ou arb... \n",
292
+ "91 Artichaut \n",
293
+ "104 Avoine d’hiver \n",
294
+ "117 Avoine de printemps \n",
295
+ "\n",
296
+ " LIBELLE_GROUPE_CULTURE SURF_PARC \n",
297
+ "2 Autres cultures industrielles 277.71 \n",
298
+ "14 Divers 5.31 \n",
299
+ "26 Fourrage 118.12 \n",
300
+ "37 Vergers 3.18 \n",
301
+ "49 Légumes ou fleurs 49.67 \n",
302
+ "61 Autres cultures industrielles 29.14 \n",
303
+ "78 Autres cultures industrielles 20.14 \n",
304
+ "91 Légumes ou fleurs 2953.38 \n",
305
+ "104 Autres céréales 5176.17 \n",
306
+ "117 Autres céréales 877.02 "
307
+ ]
308
+ },
309
+ "execution_count": 71,
310
+ "metadata": {},
311
+ "output_type": "execute_result"
312
+ }
313
+ ],
314
+ "source": [
315
+ "sorted_df"
316
+ ]
317
+ }
318
+ ],
319
+ "metadata": {
320
+ "kernelspec": {
321
+ "display_name": "hackathon",
322
+ "language": "python",
323
+ "name": "python3"
324
+ },
325
+ "language_info": {
326
+ "codemirror_mode": {
327
+ "name": "ipython",
328
+ "version": 3
329
+ },
330
+ "file_extension": ".py",
331
+ "mimetype": "text/x-python",
332
+ "name": "python",
333
+ "nbconvert_exporter": "python",
334
+ "pygments_lexer": "ipython3",
335
+ "version": "3.13.2"
336
+ }
337
+ },
338
+ "nbformat": 4,
339
+ "nbformat_minor": 2
340
+ }