Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import os | |
import json | |
import logging | |
from pathlib import Path | |
from huggingface_hub import HfApi | |
from dotenv import load_dotenv | |
from app.config.hf_config import HF_ORGANIZATION | |
# Get the backend directory path | |
BACKEND_DIR = Path(__file__).parent.parent | |
ROOT_DIR = BACKEND_DIR.parent | |
# Load environment variables from .env file in root directory | |
load_dotenv(ROOT_DIR / ".env") | |
# Configure logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(message)s' | |
) | |
logger = logging.getLogger(__name__) | |
# Initialize Hugging Face API | |
HF_TOKEN = os.getenv("HF_TOKEN") | |
if not HF_TOKEN: | |
raise ValueError("HF_TOKEN not found in environment variables") | |
api = HfApi(token=HF_TOKEN) | |
def count_evaluated_models(): | |
"""Count the number of evaluated models""" | |
try: | |
# Get dataset info | |
dataset_info = api.dataset_info(repo_id=f"{HF_ORGANIZATION}/contents", repo_type="dataset") | |
# Get file list | |
files = api.list_repo_files(f"{HF_ORGANIZATION}/contents", repo_type="dataset") | |
# Get last commit info | |
commits = api.list_repo_commits(f"{HF_ORGANIZATION}/contents", repo_type="dataset") | |
last_commit = next(commits, None) | |
# Count lines in jsonl files | |
total_entries = 0 | |
for file in files: | |
if file.endswith('.jsonl'): | |
try: | |
# Download file content | |
content = api.hf_hub_download( | |
repo_id=f"{HF_ORGANIZATION}/contents", | |
filename=file, | |
repo_type="dataset" | |
) | |
# Count lines | |
with open(content, 'r') as f: | |
for _ in f: | |
total_entries += 1 | |
except Exception as e: | |
logger.error(f"Error processing file {file}: {str(e)}") | |
continue | |
# Build response | |
response = { | |
"total_models": total_entries, | |
"last_modified": last_commit.created_at if last_commit else None, | |
"file_count": len(files), | |
"size_bytes": dataset_info.size_in_bytes, | |
"downloads": dataset_info.downloads | |
} | |
return response | |
except Exception as e: | |
logger.error(f"Error counting evaluated models: {str(e)}") | |
return { | |
"error": str(e) | |
} | |
def main(): | |
"""Main function to count evaluated models""" | |
try: | |
logger.info("\nAnalyzing evaluated models...") | |
result = count_evaluated_models() | |
if 'error' in result: | |
logger.error(f"β Error: {result['error']}") | |
else: | |
logger.info(f"β {result['total_models']} models evaluated") | |
logger.info(f"β {result['file_count']} files") | |
logger.info(f"β {result['size_bytes'] / 1024:.1f} KB") | |
logger.info(f"β {result['downloads']} downloads") | |
if result['last_modified']: | |
last_modified = datetime.fromisoformat(result['last_modified'].replace('Z', '+00:00')) | |
logger.info(f"β Last modified: {last_modified.strftime('%Y-%m-%d %H:%M:%S')}") | |
return result | |
except Exception as e: | |
logger.error(f"Global error: {str(e)}") | |
return {"error": str(e)} | |
if __name__ == "__main__": | |
main() |