File size: 2,366 Bytes
b152d54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import requests
from bs4 import BeautifulSoup
import re
import json

regionCode = 'CO-ANT'
ebird_api_key = 'dj45pbobk3g5'


def get_region_species_list():
    url = f'https://api.ebird.org/v2/product/spplist/{regionCode}'
    headers = {'x-ebirdapitoken':ebird_api_key}
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        data = response.json()
        return data
    else:
        return None
    
def get_species_name(taxonCode):
    url = f'https://api.ebird.org/v2/ref/taxonomy/ebird?species={taxonCode}&fmt=json'
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        return data[0]['sciName']
    else:
        return None

def get_image_url(regionCode, taxonCode):
    # url = f"https://search.macaulaylibrary.org/catalog?regionCode={regionCode}&taxonCode={taxonCode}&sort=rating_rank_desc&mediaType=photo"
    url = f"https://search.macaulaylibrary.org/catalog?taxonCode={taxonCode}&sort=rating_rank_desc&mediaType=photo"
    
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        script_str = soup.find_all('script')
        # enlace = soup.find('meta', {'property': 'og:image'})['content']
        # print('enlace: ', enlace)
        match = re.search(r'assetId:(\d+),', str(script_str))
        if match:
            asset_id = match.group(1)
            enlace = f"https://cdn.download.ams.birds.cornell.edu/api/v1/asset/{asset_id}"
            print(enlace)
            return enlace
        else:
            return None
    else:
        return None


species = []

species_codes = get_region_species_list()
print('len of species codes: ', len(species_codes))
if species_codes:
    for ind, species_code in enumerate(species_codes):
        print(ind + 1, 'of', len(species_codes))
        species_name = get_species_name(species_code)
        if not species_code:
            print('error with: ', species_code)

        image_url = get_image_url(regionCode, species_code)
        if not image_url:
            print('error with image url: ', species_code)

        species.append({'code':species_code, 'name':species_name, 'image_url':image_url})

# save to json:

with open('species.json', 'w') as f:
    json.dump(species, f)

print('len of species: ', len(species))