File size: 3,486 Bytes
e7abd9e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import os
import json
import logging
from pathlib import Path
from huggingface_hub import HfApi
from dotenv import load_dotenv
from app.config.hf_config import HF_ORGANIZATION

# Get the backend directory path
BACKEND_DIR = Path(__file__).parent.parent
ROOT_DIR = BACKEND_DIR.parent

# Load environment variables from .env file in root directory
load_dotenv(ROOT_DIR / ".env")

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(message)s'
)
logger = logging.getLogger(__name__)

# Initialize Hugging Face API
HF_TOKEN = os.getenv("HF_TOKEN")
if not HF_TOKEN:
    raise ValueError("HF_TOKEN not found in environment variables")
api = HfApi(token=HF_TOKEN)

def count_evaluated_models():
    """Count the number of evaluated models"""
    try:
        # Get dataset info
        dataset_info = api.dataset_info(repo_id=f"{HF_ORGANIZATION}/contents", repo_type="dataset")
        
        # Get file list
        files = api.list_repo_files(f"{HF_ORGANIZATION}/contents", repo_type="dataset")
        
        # Get last commit info
        commits = api.list_repo_commits(f"{HF_ORGANIZATION}/contents", repo_type="dataset")
        last_commit = next(commits, None)
        
        # Count lines in jsonl files
        total_entries = 0
        for file in files:
            if file.endswith('.jsonl'):
                try:
                    # Download file content
                    content = api.hf_hub_download(
                        repo_id=f"{HF_ORGANIZATION}/contents",
                        filename=file,
                        repo_type="dataset"
                    )
                    
                    # Count lines
                    with open(content, 'r') as f:
                        for _ in f:
                            total_entries += 1
                            
                except Exception as e:
                    logger.error(f"Error processing file {file}: {str(e)}")
                    continue
        
        # Build response
        response = {
            "total_models": total_entries,
            "last_modified": last_commit.created_at if last_commit else None,
            "file_count": len(files),
            "size_bytes": dataset_info.size_in_bytes,
            "downloads": dataset_info.downloads
        }
        
        return response
        
    except Exception as e:
        logger.error(f"Error counting evaluated models: {str(e)}")
        return {
            "error": str(e)
        }

def main():
    """Main function to count evaluated models"""
    try:
        logger.info("\nAnalyzing evaluated models...")
        result = count_evaluated_models()
        
        if 'error' in result:
            logger.error(f"❌ Error: {result['error']}")
        else:
            logger.info(f"βœ“ {result['total_models']} models evaluated")
            logger.info(f"βœ“ {result['file_count']} files")
            logger.info(f"βœ“ {result['size_bytes'] / 1024:.1f} KB")
            logger.info(f"βœ“ {result['downloads']} downloads")
            
            if result['last_modified']:
                last_modified = datetime.fromisoformat(result['last_modified'].replace('Z', '+00:00'))
                logger.info(f"βœ“ Last modified: {last_modified.strftime('%Y-%m-%d %H:%M:%S')}")
        
        return result
            
    except Exception as e:
        logger.error(f"Global error: {str(e)}")
        return {"error": str(e)}

if __name__ == "__main__":
    main()