|
import gradio as gr |
|
from PIL import Image |
|
import os |
|
import time |
|
import numpy as np |
|
import torch |
|
import warnings |
|
import stat |
|
import subprocess |
|
import sys |
|
import asyncio |
|
import nest_asyncio |
|
|
|
|
|
nest_asyncio.apply() |
|
|
|
|
|
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128" |
|
|
|
|
|
print(f"Python version: {sys.version}") |
|
print(f"PyTorch version: {torch.__version__}") |
|
print(f"CUDA available via PyTorch: {torch.cuda.is_available()}") |
|
print(f"CUDA version via PyTorch: {torch.version.cuda if torch.cuda.is_available() else 'Not available'}") |
|
|
|
|
|
def run_nvidia_smi(): |
|
try: |
|
result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) |
|
if result.returncode == 0: |
|
print("nvidia-smi output:") |
|
print(result.stdout) |
|
return True |
|
else: |
|
print("nvidia-smi error:") |
|
print(result.stderr) |
|
return False |
|
except Exception as e: |
|
print(f"Error running nvidia-smi: {str(e)}") |
|
return False |
|
|
|
|
|
nvidia_smi_available = run_nvidia_smi() |
|
print(f"nvidia-smi available: {nvidia_smi_available}") |
|
|
|
|
|
if torch.cuda.is_available(): |
|
print(f"CUDA device count: {torch.cuda.device_count()}") |
|
for i in range(torch.cuda.device_count()): |
|
print(f"CUDA Device {i}: {torch.cuda.get_device_name(i)}") |
|
print(f"Current CUDA device: {torch.cuda.current_device()}") |
|
|
|
|
|
def setup_cache_directories(): |
|
|
|
cache_dir = os.path.join(os.getcwd(), "gradio_cached_examples") |
|
os.makedirs(cache_dir, exist_ok=True) |
|
|
|
|
|
hf_cache = os.path.join(os.getcwd(), ".cache", "huggingface") |
|
transformers_cache = os.path.join(hf_cache, "transformers") |
|
os.makedirs(hf_cache, exist_ok=True) |
|
os.makedirs(transformers_cache, exist_ok=True) |
|
|
|
|
|
try: |
|
for directory in [cache_dir, hf_cache, transformers_cache]: |
|
if os.path.exists(directory): |
|
os.chmod(directory, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO) |
|
print(f"Set permissions for {directory}") |
|
except Exception as e: |
|
print(f"Warning: Could not set permissions: {str(e)}") |
|
|
|
return cache_dir |
|
|
|
|
|
cache_dir = setup_cache_directories() |
|
|
|
|
|
warnings.filterwarnings("ignore", message=".*The 'nopython' keyword.*") |
|
warnings.filterwarnings("ignore", message=".*Torch is not compiled with CUDA enabled.*") |
|
warnings.filterwarnings("ignore", category=UserWarning) |
|
|
|
|
|
def check_gpu_availability(): |
|
"""Check if GPU is actually available and working""" |
|
print("Checking GPU availability...") |
|
|
|
if not torch.cuda.is_available(): |
|
print("CUDA is not available in PyTorch") |
|
return False |
|
|
|
try: |
|
|
|
print("Attempting to create a tensor on CUDA...") |
|
x = torch.rand(10, device="cuda") |
|
y = x + x |
|
print("Successfully created and operated on CUDA tensor") |
|
return True |
|
except Exception as e: |
|
print(f"GPU initialization failed: {str(e)}") |
|
return False |
|
|
|
|
|
internvl2_pipeline = None |
|
MODEL_LOADED = False |
|
USE_GPU = check_gpu_availability() |
|
|
|
if USE_GPU: |
|
print("GPU is available and working properly") |
|
else: |
|
print("WARNING: GPU is not available or not working properly. This application requires GPU acceleration.") |
|
|
|
|
|
try: |
|
from lmdeploy import pipeline, TurbomindEngineConfig |
|
LMDEPLOY_AVAILABLE = True |
|
print("Successfully imported lmdeploy") |
|
except ImportError as e: |
|
LMDEPLOY_AVAILABLE = False |
|
print(f"lmdeploy import failed: {str(e)}. Will use a placeholder for demos.") |
|
|
|
|
|
MODEL_ID = "OpenGVLab/InternVL2-40B-AWQ" |
|
|
|
def load_internvl2_model(): |
|
"""Load the InternVL2 model using lmdeploy""" |
|
global internvl2_pipeline, MODEL_LOADED |
|
|
|
|
|
if internvl2_pipeline is not None: |
|
return True |
|
|
|
|
|
if not LMDEPLOY_AVAILABLE: |
|
print("lmdeploy not available. Using demo placeholder.") |
|
MODEL_LOADED = False |
|
return False |
|
|
|
|
|
if not USE_GPU: |
|
print("Cannot load InternVL2 model without GPU acceleration.") |
|
MODEL_LOADED = False |
|
return False |
|
|
|
print("Loading InternVL2 model...") |
|
try: |
|
|
|
backend_config = TurbomindEngineConfig(model_format='awq') |
|
|
|
|
|
internvl2_pipeline = pipeline( |
|
MODEL_ID, |
|
backend_config=backend_config, |
|
log_level='INFO', |
|
model_name_or_path=None, |
|
backend_name="turbomind", |
|
stream=False |
|
) |
|
|
|
print("InternVL2 model loaded successfully!") |
|
MODEL_LOADED = True |
|
return True |
|
except Exception as e: |
|
print(f"Error loading InternVL2 model: {str(e)}") |
|
if "CUDA out of memory" in str(e): |
|
print("Not enough GPU memory for the model") |
|
elif "Found no NVIDIA driver" in str(e): |
|
print("NVIDIA GPU driver not found or not properly configured") |
|
MODEL_LOADED = False |
|
return False |
|
|
|
def analyze_image(image, prompt): |
|
"""Analyze the image using InternVL2 model""" |
|
try: |
|
start_time = time.time() |
|
|
|
|
|
if not LMDEPLOY_AVAILABLE: |
|
return ("This is a demo placeholder. The actual model couldn't be loaded because lmdeploy " |
|
"is not properly installed. Check your installation and dependencies.") |
|
|
|
|
|
if not USE_GPU: |
|
return ("ERROR: This application requires a GPU to run InternVL2. " |
|
"The NVIDIA driver was not detected on this system. " |
|
"Please make sure this Space is using a GPU-enabled instance and that the GPU is correctly initialized.") |
|
|
|
|
|
if not load_internvl2_model(): |
|
return "Couldn't load InternVL2 model. See logs for details." |
|
|
|
|
|
if isinstance(image, np.ndarray): |
|
image_pil = Image.fromarray(image).convert('RGB') |
|
else: |
|
|
|
image_pil = image.convert('RGB') |
|
|
|
|
|
loop = asyncio.get_event_loop() |
|
if loop.is_running(): |
|
|
|
|
|
print("Using threaded execution for model inference") |
|
|
|
def run_inference(): |
|
return internvl2_pipeline((prompt, image_pil)) |
|
|
|
|
|
response = loop.run_in_executor(None, run_inference) |
|
|
|
if hasattr(response, "result"): |
|
response = response.result() |
|
else: |
|
|
|
print("Using standard execution for model inference") |
|
response = internvl2_pipeline((prompt, image_pil)) |
|
|
|
|
|
result = response.text if hasattr(response, "text") else str(response) |
|
|
|
elapsed_time = time.time() - start_time |
|
return result |
|
|
|
except Exception as e: |
|
print(f"Error in image analysis: {str(e)}") |
|
|
|
if USE_GPU: |
|
torch.cuda.empty_cache() |
|
return f"Error in image analysis: {str(e)}" |
|
|
|
def process_image(image, analysis_type="general"): |
|
"""Process the image and return the analysis""" |
|
if image is None: |
|
return "Please upload an image." |
|
|
|
|
|
if analysis_type == "general": |
|
prompt = "Describe this image in detail." |
|
elif analysis_type == "text": |
|
prompt = "What text can you see in this image? Please transcribe it accurately." |
|
elif analysis_type == "chart": |
|
prompt = "Analyze any charts, graphs or diagrams in this image in detail, including trends, data points, and conclusions." |
|
elif analysis_type == "people": |
|
prompt = "Describe the people in this image - their appearance, actions, and expressions." |
|
elif analysis_type == "technical": |
|
prompt = "Provide a technical analysis of this image, including object identification, spatial relationships, and any technical elements present." |
|
else: |
|
prompt = "Describe this image in detail." |
|
|
|
start_time = time.time() |
|
|
|
|
|
analysis = analyze_image(image, prompt) |
|
|
|
elapsed_time = time.time() - start_time |
|
return f"{analysis}\n\nAnalysis completed in {elapsed_time:.2f} seconds." |
|
|
|
|
|
def create_interface(): |
|
with gr.Blocks(title="Image Analysis with InternVL2") as demo: |
|
gr.Markdown("# Image Analysis with InternVL2-40B") |
|
|
|
|
|
system_info = f""" |
|
## System Diagnostics: |
|
- PyTorch Version: {torch.__version__} |
|
- CUDA Available: {torch.cuda.is_available()} |
|
- GPU Working: {USE_GPU} |
|
- nvidia-smi Available: {nvidia_smi_available} |
|
""" |
|
|
|
gr.Markdown(system_info) |
|
gr.Markdown("Upload an image to analyze it using the InternVL2-40B model.") |
|
|
|
|
|
if not LMDEPLOY_AVAILABLE: |
|
gr.Markdown("⚠️ **WARNING**: lmdeploy is not properly installed. This demo will not function correctly.", elem_classes=["warning-message"]) |
|
|
|
if not USE_GPU: |
|
gr.Markdown("🚫 **ERROR**: NVIDIA GPU not detected. This application requires GPU acceleration to run InternVL2 model.", elem_classes=["error-message"]) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
input_image = gr.Image(type="pil", label="Upload Image") |
|
analysis_type = gr.Radio( |
|
["general", "text", "chart", "people", "technical"], |
|
label="Analysis Type", |
|
value="general" |
|
) |
|
submit_btn = gr.Button("Analyze Image") |
|
|
|
|
|
if not USE_GPU: |
|
submit_btn.interactive = False |
|
|
|
with gr.Column(scale=2): |
|
output_text = gr.Textbox(label="Analysis Result", lines=20) |
|
if not USE_GPU: |
|
output_text.value = f"""ERROR: NVIDIA GPU driver not detected. This application requires GPU acceleration to run the InternVL2 model. |
|
|
|
Diagnostics: |
|
- PyTorch Version: {torch.__version__} |
|
- CUDA Available via PyTorch: {torch.cuda.is_available()} |
|
- nvidia-smi Available: {nvidia_smi_available} |
|
- GPU Working: {USE_GPU} |
|
|
|
Please ensure this Space is using a GPU-enabled instance and that the GPU is correctly initialized.""" |
|
|
|
submit_btn.click( |
|
fn=process_image, |
|
inputs=[input_image, analysis_type], |
|
outputs=output_text |
|
) |
|
|
|
gr.Markdown(""" |
|
## Analysis Types |
|
- **General**: General description of the image |
|
- **Text**: Focus on identifying and transcribing text in the image |
|
- **Chart**: Detailed analysis of charts, graphs, and diagrams |
|
- **People**: Description of people, their appearance and actions |
|
- **Technical**: Technical analysis identifying objects and spatial relationships |
|
""") |
|
|
|
|
|
gr.Markdown(""" |
|
## System Requirements |
|
This application requires: |
|
- NVIDIA GPU with CUDA support |
|
- At least 16GB of GPU memory recommended |
|
- GPU drivers properly installed and configured |
|
|
|
If you're running this on Hugging Face Spaces, make sure to select a GPU-enabled hardware type. |
|
""") |
|
|
|
|
|
try: |
|
gr.Examples( |
|
examples=[ |
|
["data_temp/page_2.png", "general"], |
|
["data_temp/page_2.png", "text"], |
|
["data_temp/page_2.png", "chart"] |
|
], |
|
inputs=[input_image, analysis_type], |
|
outputs=output_text, |
|
fn=process_image, |
|
cache_examples=True |
|
) |
|
except Exception as e: |
|
print(f"Warning: Could not load examples: {str(e)}") |
|
|
|
return demo |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
demo = create_interface() |
|
|
|
|
|
demo.launch(share=False, server_name="0.0.0.0") |