mknolan commited on
Commit
76655cb
·
verified ·
1 Parent(s): 106a1b4

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +128 -0
app.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import os
3
+ import sys
4
+ import traceback
5
+ import gradio as gr
6
+ from PIL import Image
7
+ from transformers import AutoModel, CLIPImageProcessor
8
+
9
+ print("=" * 50)
10
+ print("INTERNVIT-6B MODEL LOADING TEST (NO FLASH-ATTN)")
11
+ print("=" * 50)
12
+
13
+ # System information
14
+ print(f"Python version: {sys.version}")
15
+ print(f"PyTorch version: {torch.__version__}")
16
+ print(f"CUDA available: {torch.cuda.is_available()}")
17
+
18
+ if torch.cuda.is_available():
19
+ print(f"CUDA version: {torch.version.cuda}")
20
+ print(f"GPU count: {torch.cuda.device_count()}")
21
+ for i in range(torch.cuda.device_count()):
22
+ print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
23
+
24
+ # Memory info
25
+ print(f"Total GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
26
+ print(f"Allocated GPU memory: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
27
+ print(f"Reserved GPU memory: {torch.cuda.memory_reserved() / 1e9:.2f} GB")
28
+ else:
29
+ print("CUDA is not available. This is a critical issue for model loading.")
30
+
31
+ # Create a function to load and test the model
32
+ def load_and_test_model():
33
+ try:
34
+ # Monkey patch to disable flash attention
35
+ import sys
36
+ import types
37
+
38
+ # Create a fake flash_attn module
39
+ flash_attn_module = types.ModuleType("flash_attn")
40
+ flash_attn_module.__version__ = "0.0.0-disabled"
41
+ sys.modules["flash_attn"] = flash_attn_module
42
+
43
+ print("\nNOTE: Created dummy flash_attn module to avoid dependency error")
44
+ print("This is just for testing basic model loading - some functionality may be disabled")
45
+
46
+ print("\nLoading model with bfloat16 precision and low_cpu_mem_usage=True...")
47
+ model = AutoModel.from_pretrained(
48
+ "OpenGVLab/InternViT-6B-224px",
49
+ torch_dtype=torch.bfloat16,
50
+ low_cpu_mem_usage=True,
51
+ trust_remote_code=True)
52
+
53
+ if torch.cuda.is_available():
54
+ print("Moving model to CUDA...")
55
+ model = model.cuda()
56
+
57
+ model.eval()
58
+ print("✓ Model loaded successfully!")
59
+
60
+ # Now try to process a test image
61
+ print("\nLoading image processor...")
62
+ image_processor = CLIPImageProcessor.from_pretrained("OpenGVLab/InternViT-6B-224px")
63
+ print("✓ Image processor loaded successfully!")
64
+
65
+ # Create a simple test image
66
+ print("\nCreating test image...")
67
+ test_image = Image.new("RGB", (224, 224), color="red")
68
+
69
+ # Process the test image
70
+ print("Processing test image...")
71
+ pixel_values = image_processor(images=test_image, return_tensors="pt").pixel_values
72
+
73
+ # FIXED: Always convert to bfloat16 first, then optionally move to CUDA
74
+ print("Converting image tensor to bfloat16 to match model dtype...")
75
+ pixel_values = pixel_values.to(torch.bfloat16)
76
+
77
+ if torch.cuda.is_available():
78
+ print("Moving image tensor to CUDA...")
79
+ pixel_values = pixel_values.cuda()
80
+
81
+ # Get model parameters
82
+ params = sum(p.numel() for p in model.parameters())
83
+ print(f"Model parameters: {params:,}")
84
+
85
+ # Forward pass
86
+ print("Running forward pass...")
87
+ with torch.no_grad():
88
+ outputs = model(pixel_values)
89
+
90
+ print("✓ Forward pass successful!")
91
+ print(f"Output shape: {outputs.last_hidden_state.shape}")
92
+
93
+ return f"SUCCESS: Model loaded and test passed!\nParameters: {params:,}\nOutput shape: {outputs.last_hidden_state.shape}"
94
+
95
+ except Exception as e:
96
+ print(f"\n❌ ERROR: {str(e)}")
97
+ traceback.print_exc()
98
+ return f"FAILED: Error loading model or processing image\nError: {str(e)}"
99
+
100
+ # Create a simple Gradio interface
101
+ def create_interface():
102
+ with gr.Blocks(title="InternViT-6B Test") as demo:
103
+ gr.Markdown("# InternViT-6B Model Loading Test (without Flash Attention)")
104
+ gr.Markdown("### This version uses a dummy flash-attn implementation to avoid compilation issues")
105
+
106
+ with gr.Row():
107
+ test_btn = gr.Button("Test Model Loading")
108
+ output = gr.Textbox(label="Test Results", lines=10)
109
+
110
+ test_btn.click(fn=load_and_test_model, inputs=[], outputs=output)
111
+
112
+ return demo
113
+
114
+ # Main function
115
+ if __name__ == "__main__":
116
+ # Print environment variables
117
+ print("\nEnvironment variables:")
118
+ relevant_vars = ["CUDA_VISIBLE_DEVICES", "NVIDIA_VISIBLE_DEVICES",
119
+ "TRANSFORMERS_CACHE", "HF_HOME", "PYTORCH_CUDA_ALLOC_CONF"]
120
+ for var in relevant_vars:
121
+ print(f"{var}: {os.environ.get(var, 'Not set')}")
122
+
123
+ # Set environment variable for better GPU memory management
124
+ os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
125
+
126
+ # Create and launch the interface
127
+ demo = create_interface()
128
+ demo.launch(share=False, server_name="0.0.0.0")