Spaces:
Runtime error
Runtime error
File size: 3,849 Bytes
de08500 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import torch
import os
import sys
import traceback
import requests
import json
import platform
print("=" * 50)
print("DETAILED MODEL LOADING DIAGNOSTIC")
print("=" * 50)
# System information
print("\n1. SYSTEM INFORMATION:")
print(f"Python version: {sys.version}")
print(f"PyTorch version: {torch.__version__}")
print(f"Platform: {platform.platform()}")
print(f"Processor: {platform.processor()}")
# Environment variables
print("\n2. ENVIRONMENT VARIABLES:")
relevant_vars = ["CUDA_VISIBLE_DEVICES", "NVIDIA_VISIBLE_DEVICES", "TRANSFORMERS_CACHE", "HF_HOME"]
for var in relevant_vars:
print(f"{var}: {os.environ.get(var, 'Not set')}")
# GPU information
print("\n3. GPU DETECTION:")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
try:
print(f"CUDA version: {torch.version.cuda}")
print(f"GPU count: {torch.cuda.device_count()}")
for i in range(torch.cuda.device_count()):
print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
# Test GPU with a simple operation
print("\nTesting GPU with tensor operations...")
test_tensor = torch.rand(1000, 1000, device="cuda")
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)
start.record()
result = torch.matmul(test_tensor, test_tensor)
end.record()
torch.cuda.synchronize()
print(f"GPU tensor operation completed in {start.elapsed_time(end):.2f} ms")
# Memory info
print(f"\nTotal GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
print(f"Allocated GPU memory: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
print(f"Reserved GPU memory: {torch.cuda.memory_reserved() / 1e9:.2f} GB")
except Exception as e:
print(f"Error testing GPU: {str(e)}")
traceback.print_exc()
else:
print("CUDA is not available. This is a critical issue for model loading.")
# HuggingFace hub connectivity
print("\n4. HUGGINGFACE HUB CONNECTIVITY:")
try:
print("Testing connection to HuggingFace Hub...")
response = requests.get("https://huggingface.co/api/models/OpenGVLab/InternViT-6B-224px")
if response.status_code == 200:
print("Successfully connected to HuggingFace Hub")
model_info = response.json()
print(f"Model exists: OpenGVLab/InternViT-6B-224px")
if 'downloads' in model_info:
print(f"Downloads: {model_info['downloads']}")
else:
print(f"Failed to connect to HuggingFace Hub: Status code {response.status_code}")
print(response.text)
except Exception as e:
print(f"Error connecting to HuggingFace Hub: {str(e)}")
traceback.print_exc()
# Attempt model loading with detailed error capture
print("\n5. ATTEMPTING MODEL LOADING:")
try:
print("Importing transformers...")
from transformers import AutoModel, AutoProcessor
print("✓ Transformers imported successfully")
print("\nLoading AutoProcessor...")
processor = AutoProcessor.from_pretrained("OpenGVLab/InternViT-6B-224px")
print("✓ AutoProcessor loaded successfully")
print("\nLoading AutoModel...")
model = AutoModel.from_pretrained("OpenGVLab/InternViT-6B-224px")
print("✓ AutoModel loaded successfully")
if torch.cuda.is_available():
print("\nMoving model to CUDA...")
model = model.to("cuda")
print("✓ Model moved to CUDA successfully")
print("\nModel loading SUCCESSFUL")
print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")
except Exception as e:
print(f"\n❌ ERROR LOADING MODEL: {str(e)}")
print("\nDetailed traceback:")
traceback.print_exc()
print("\n" + "=" * 50)
print("DIAGNOSTIC COMPLETE")
print("=" * 50) |