# ---------------------- Library Imports ---------------------- import os import json import pandas as pd import numpy as np import logging import requests from dotenv import load_dotenv from requests import Session from requests.exceptions import ConnectionError, Timeout, TooManyRedirects import time # ---------------------- Environment Variables ---------------------- load_dotenv() url_cmc = os.getenv("URL_CMC") api_key_cmc = os.getenv("API_KEY_CMC") log_folder = os.getenv("LOG_FOLDER") os.makedirs(log_folder, exist_ok=True) log_file = os.path.join(log_folder, "cmc_scrapping.log") log_format = "%(asctime)s [%(levelname)s] - %(message)s" logging.basicConfig(filename=log_file, level=logging.INFO, format=log_format) # ---------------------- Helper Functions ---------------------- def log_execution_time(func): def wrapper(*args, **kwargs): start_time = time.time() result = func(*args, **kwargs) end_time = time.time() logging.info(f"Function {func.__name__} executed in {end_time - start_time:.2f} seconds") return result return wrapper def process_cmc_data(data, stop): df = pd.DataFrame(data["data"])[["name", "symbol", "circulating_supply", "total_supply", "quote"]] quote_df = pd.json_normalize(df['quote'].apply(lambda x: x['USD']))[["price", "percent_change_24h", "percent_change_7d", "percent_change_90d", "market_cap", "fully_diluted_market_cap", "last_updated"]] df = df.drop("quote", axis=1) df["percent_tokens_circulation"] = np.round((df["circulating_supply"] / df["total_supply"]) * 100, 1) df = df.join(quote_df) df["last_updated"] = pd.to_datetime(df["last_updated"]) save_cmc_data(df, stop) def save_cmc_data(df, stop): output_file = f"output/top_{stop}_update.csv" if os.path.isfile(output_file): existing_data = pd.read_csv(output_file) updated_data = pd.concat([existing_data, df], axis=0, ignore_index=True) updated_data.drop_duplicates(subset=["symbol", "last_updated"], inplace=True) updated_data.to_csv(output_file, index=False) else: df.to_csv(output_file, index=False) logging.info("CMC data script execution completed.") # ---------------------- CMC Scraping Function ---------------------- @log_execution_time def fetch_and_process_cmc_data(): session = Session() session.headers.update({ 'Accepts': 'application/json', 'X-CMC_PRO_API_KEY': api_key_cmc, }) parameters = { 'start': '1', 'limit': '100', 'convert': 'USD' } for endpoint in ["v1/cryptocurrency/listings/latest"]: target = f"{url_cmc}/{endpoint}" try: response = session.get(target, params=parameters) data = json.loads(response.text) with open(f'output/cmc_data_{endpoint.replace("/", "_")}_100.json', 'w') as f: json.dump(data, f) process_cmc_data(data, '100') except (ConnectionError, Timeout, TooManyRedirects) as e: logging.error(f"Error while fetching data from {target}: {e}") # ---------------------- Execution ---------------------- if __name__ == "__main__": fetch_and_process_cmc_data()