Spaces:

mknolan
/

cursor_slides_internvl2

Paused

App Files Files Community

cursor_slides_internvl2 / simple_gpu_app.py

mknolan

Upload InternVL2 implementation

02532a9 verified 4 months ago

raw

history blame

3.33 kB

	import gradio as gr
	import torch
	from transformers import AutoProcessor, AutoModelForVision2Seq
	from PIL import Image

	# Print system information
	print(f"PyTorch version: {torch.__version__}")
	print(f"CUDA available: {torch.cuda.is_available()}")
	if torch.cuda.is_available():
	print(f"CUDA device: {torch.cuda.get_device_name(0)}")

	# Load a smaller model that should work even with limited resources
	model_id = "Salesforce/blip-image-captioning-base" # ~1 GB model, very reliable
	device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"Using device: {device}")

	# Create global variables for model and processor
	processor = None
	model = None

	def load_model():
	global processor, model
	try:
	print("Loading model and processor...")
	processor = AutoProcessor.from_pretrained(model_id)
	model = AutoModelForVision2Seq.from_pretrained(model_id).to(device)
	print("Model loaded successfully")
	return True
	except Exception as e:
	print(f"Error loading model: {e}")
	return False

	def analyze_image(image):
	# If model not loaded yet, try to load it
	global processor, model
	if model is None:
	success = load_model()
	if not success:
	return "Failed to load model. Check logs for details."

	try:
	if isinstance(image, str):
	# If image is a filepath
	image = Image.open(image).convert('RGB')
	elif not isinstance(image, Image.Image):
	# If image is numpy array (from gradio)
	image = Image.fromarray(image).convert('RGB')

	# Process image
	inputs = processor(images=image, return_tensors="pt").to(device)

	# Generate caption
	with torch.no_grad():
	output = model.generate(**inputs, max_length=100)

	# Decode caption
	caption = processor.decode(output[0], skip_special_tokens=True)

	# Get device information
	if device == "cuda":
	memory_info = torch.cuda.memory_allocated() / 1024**2
	return f"Caption: {caption}\n\nUsing device: {device} ({torch.cuda.get_device_name(0)})\nGPU memory used: {memory_info:.2f} MB"
	else:
	return f"Caption: {caption}\n\nUsing device: {device}"

	except Exception as e:
	print(f"Error during inference: {e}")
	return f"Error during inference: {str(e)}"

	# Create Gradio interface
	with gr.Blocks(title="Simple GPU Test") as demo:
	gr.Markdown("# Simple GPU Test with BLIP Image Captioning")

	with gr.Row():
	with gr.Column():
	image_input = gr.Image(type="pil", label="Upload an image")
	submit_btn = gr.Button("Generate Caption")

	# Show if GPU is available
	if torch.cuda.is_available():
	gr.Markdown(f"✅ GPU detected: {torch.cuda.get_device_name(0)}")
	else:
	gr.Markdown("❌ No GPU detected. Running on CPU.")

	with gr.Column():
	output_text = gr.Textbox(label="Result", lines=5)

	submit_btn.click(
	fn=analyze_image,
	inputs=[image_input],
	outputs=[output_text]
	)

	# Launch the app
	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0")