import torch import os import sys import traceback import gradio as gr from PIL import Image from transformers import AutoModel, CLIPImageProcessor print("=" * 50) print("SIMPLE INTERNVIT-6B MODEL LOADING TEST") print("=" * 50) # System information print(f"Python version: {sys.version}") print(f"PyTorch version: {torch.__version__}") print(f"CUDA available: {torch.cuda.is_available()}") if torch.cuda.is_available(): print(f"CUDA version: {torch.version.cuda}") print(f"GPU count: {torch.cuda.device_count()}") for i in range(torch.cuda.device_count()): print(f"GPU {i}: {torch.cuda.get_device_name(i)}") # Memory info print(f"Total GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB") print(f"Allocated GPU memory: {torch.cuda.memory_allocated() / 1e9:.2f} GB") print(f"Reserved GPU memory: {torch.cuda.memory_reserved() / 1e9:.2f} GB") else: print("CUDA is not available. This is a critical issue for model loading.") # Create a function to load and test the model def load_and_test_model(): try: print("\nLoading model with bfloat16 precision and low_cpu_mem_usage=True...") model = AutoModel.from_pretrained( 'OpenGVLab/InternViT-6B-224px', torch_dtype=torch.bfloat16, low_cpu_mem_usage=True, trust_remote_code=True) if torch.cuda.is_available(): print("Moving model to CUDA...") model = model.cuda() model.eval() print("āœ“ Model loaded successfully!") # Now try to process a test image print("\nLoading image processor...") image_processor = CLIPImageProcessor.from_pretrained('OpenGVLab/InternViT-6B-224px') print("āœ“ Image processor loaded successfully!") # Create a simple test image print("\nCreating test image...") test_image = Image.new('RGB', (224, 224), color='red') # Process the test image print("Processing test image...") pixel_values = image_processor(images=test_image, return_tensors='pt').pixel_values if torch.cuda.is_available(): pixel_values = pixel_values.to(torch.bfloat16).cuda() # Get model parameters params = sum(p.numel() for p in model.parameters()) print(f"Model parameters: {params:,}") # Forward pass print("Running forward pass...") with torch.no_grad(): outputs = model(pixel_values) print("āœ“ Forward pass successful!") print(f"Output shape: {outputs.last_hidden_state.shape}") return f"SUCCESS: Model loaded and test passed!\nParameters: {params:,}\nOutput shape: {outputs.last_hidden_state.shape}" except Exception as e: print(f"\nāŒ ERROR: {str(e)}") traceback.print_exc() return f"FAILED: Error loading model or processing image\nError: {str(e)}" # Create a simple Gradio interface def create_interface(): with gr.Blocks(title="InternViT-6B Test") as demo: gr.Markdown("# InternViT-6B Model Loading Test") with gr.Row(): test_btn = gr.Button("Test Model Loading") output = gr.Textbox(label="Test Results", lines=10) test_btn.click(fn=load_and_test_model, inputs=[], outputs=output) return demo # Main function if __name__ == "__main__": # Print environment variables print("\nEnvironment variables:") relevant_vars = ["CUDA_VISIBLE_DEVICES", "NVIDIA_VISIBLE_DEVICES", "TRANSFORMERS_CACHE", "HF_HOME", "PYTORCH_CUDA_ALLOC_CONF"] for var in relevant_vars: print(f"{var}: {os.environ.get(var, 'Not set')}") # Set environment variable for better GPU memory management os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128" # Create and launch the interface demo = create_interface() demo.launch(share=False, server_name="0.0.0.0")