import requests from bs4 import BeautifulSoup import re import json regionCode = 'CO-ANT' ebird_api_key = 'dj45pbobk3g5' def get_region_species_list(): url = f'https://api.ebird.org/v2/product/spplist/{regionCode}' headers = {'x-ebirdapitoken':ebird_api_key} response = requests.get(url, headers=headers) if response.status_code == 200: data = response.json() return data else: return None def get_species_name(taxonCode): url = f'https://api.ebird.org/v2/ref/taxonomy/ebird?species={taxonCode}&fmt=json' response = requests.get(url) if response.status_code == 200: data = response.json() return data[0]['sciName'] else: return None def get_image_url(regionCode, taxonCode): # url = f"https://search.macaulaylibrary.org/catalog?regionCode={regionCode}&taxonCode={taxonCode}&sort=rating_rank_desc&mediaType=photo" url = f"https://search.macaulaylibrary.org/catalog?taxonCode={taxonCode}&sort=rating_rank_desc&mediaType=photo" response = requests.get(url) if response.status_code == 200: soup = BeautifulSoup(response.content, 'html.parser') script_str = soup.find_all('script') # enlace = soup.find('meta', {'property': 'og:image'})['content'] # print('enlace: ', enlace) match = re.search(r'assetId:(\d+),', str(script_str)) if match: asset_id = match.group(1) enlace = f"https://cdn.download.ams.birds.cornell.edu/api/v1/asset/{asset_id}" print(enlace) return enlace else: return None else: return None species = [] species_codes = get_region_species_list() print('len of species codes: ', len(species_codes)) if species_codes: for ind, species_code in enumerate(species_codes): print(ind + 1, 'of', len(species_codes)) species_name = get_species_name(species_code) if not species_code: print('error with: ', species_code) image_url = get_image_url(regionCode, species_code) if not image_url: print('error with image url: ', species_code) species.append({'code':species_code, 'name':species_name, 'image_url':image_url}) # save to json: with open('species.json', 'w') as f: json.dump(species, f) print('len of species: ', len(species))