adityajain07 commited on
Commit
0db7cc1
·
verified ·
1 Parent(s): 3863a88

Delete prepare_gbif_checklist.py

Browse files
Files changed (1) hide show
  1. prepare_gbif_checklist.py +0 -69
prepare_gbif_checklist.py DELETED
@@ -1,69 +0,0 @@
1
- #!/usr/bin/env python
2
- # coding: utf-8
3
-
4
- """Prepare the GBIF checklist for the global moth model"""
5
-
6
- import os
7
- from pathlib import Path
8
-
9
- # System packages
10
- import pandas as pd
11
-
12
- # 3rd party packages
13
- from dotenv import load_dotenv
14
-
15
- # Load secrets and config from optional .env file
16
- load_dotenv()
17
-
18
-
19
- def remove_non_species_taxon(checklist: pd.DataFrame) -> pd.DataFrame:
20
- """
21
- Remove all non-species taxa from the checklist
22
- """
23
-
24
- # Keep only rows where the taxa rank is "SPECIES"
25
- checklist = checklist.loc[checklist["taxonRank"] == "SPECIES"]
26
-
27
- return checklist
28
-
29
-
30
- def remove_butterflies(checklist: pd.DataFrame) -> pd.DataFrame:
31
- """
32
- Remove all butterflies from the checklist
33
- """
34
-
35
- # List of butterfly families
36
- butterfly_fm = [
37
- "Hesperiidae",
38
- "Lycaenidae",
39
- "Nymphalidae",
40
- "Papilionidae",
41
- "Pieridae",
42
- "Riodinidae",
43
- "Hedylidae",
44
- ]
45
-
46
- # Remove butterfly families
47
- checklist = checklist.loc[~checklist["family"].isin(butterfly_fm)]
48
-
49
- return checklist
50
-
51
-
52
- if __name__ == "__main__":
53
- GLOBAL_MODEL_DIR = os.getenv("GLOBAL_MODEL_DIR")
54
-
55
- # Remove non-species taxa
56
- checklist = "gbif_leps_checklist_07242024_original.csv"
57
- checklist_pd = pd.read_csv(Path(GLOBAL_MODEL_DIR) / checklist)
58
- leps_checklist_pd = remove_non_species_taxon(checklist_pd)
59
- leps_checklist_pd.to_csv(
60
- Path(GLOBAL_MODEL_DIR) / "gbif_leps_checklist_07242024.csv", index=False
61
- )
62
-
63
- # Remove butterflies
64
- checklist = "gbif_leps_checklist_07242024.csv"
65
- checklist_pd = pd.read_csv(Path(GLOBAL_MODEL_DIR) / checklist)
66
- moth_checklist_pd = remove_butterflies(checklist_pd)
67
- moth_checklist_pd.to_csv(
68
- Path(GLOBAL_MODEL_DIR) / "gbif_moth_checklist_07242024.csv", index=False
69
- )