colombia_birds_quiz / create_dbs.py
te's picture
first commit
b152d54
import requests
from bs4 import BeautifulSoup
import re
import json
regionCode = 'CO-ANT'
ebird_api_key = 'dj45pbobk3g5'
def get_region_species_list():
url = f'https://api.ebird.org/v2/product/spplist/{regionCode}'
headers = {'x-ebirdapitoken':ebird_api_key}
response = requests.get(url, headers=headers)
if response.status_code == 200:
data = response.json()
return data
else:
return None
def get_species_name(taxonCode):
url = f'https://api.ebird.org/v2/ref/taxonomy/ebird?species={taxonCode}&fmt=json'
response = requests.get(url)
if response.status_code == 200:
data = response.json()
return data[0]['sciName']
else:
return None
def get_image_url(regionCode, taxonCode):
# url = f"https://search.macaulaylibrary.org/catalog?regionCode={regionCode}&taxonCode={taxonCode}&sort=rating_rank_desc&mediaType=photo"
url = f"https://search.macaulaylibrary.org/catalog?taxonCode={taxonCode}&sort=rating_rank_desc&mediaType=photo"
response = requests.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.content, 'html.parser')
script_str = soup.find_all('script')
# enlace = soup.find('meta', {'property': 'og:image'})['content']
# print('enlace: ', enlace)
match = re.search(r'assetId:(\d+),', str(script_str))
if match:
asset_id = match.group(1)
enlace = f"https://cdn.download.ams.birds.cornell.edu/api/v1/asset/{asset_id}"
print(enlace)
return enlace
else:
return None
else:
return None
species = []
species_codes = get_region_species_list()
print('len of species codes: ', len(species_codes))
if species_codes:
for ind, species_code in enumerate(species_codes):
print(ind + 1, 'of', len(species_codes))
species_name = get_species_name(species_code)
if not species_code:
print('error with: ', species_code)
image_url = get_image_url(regionCode, species_code)
if not image_url:
print('error with image url: ', species_code)
species.append({'code':species_code, 'name':species_name, 'image_url':image_url})
# save to json:
with open('species.json', 'w') as f:
json.dump(species, f)
print('len of species: ', len(species))