# Azure Machine Learning Deployment Guide This guide provides step-by-step instructions for deploying the Image Description application to Azure Machine Learning. ## Prerequisites - Azure subscription - Azure CLI installed and configured - Azure Machine Learning workspace - The source code from this repository ## Step 1: Set Up Azure Machine Learning 1. Create a Resource Group (if you don't have one): ```bash az group create --name image-descriptor-rg --location eastus ``` 2. Create an Azure Machine Learning workspace: ```bash az ml workspace create --workspace-name image-descriptor-ws \ --resource-group image-descriptor-rg \ --location eastus ``` ## Step 2: Create a Compute Cluster Create a GPU-enabled compute cluster for training and inference: ```bash az ml compute create --name gpu-cluster \ --workspace-name image-descriptor-ws \ --resource-group image-descriptor-rg \ --type AmlCompute \ --min-instances 0 \ --max-instances 1 \ --size Standard_NC6s_v3 ``` ## Step 3: Prepare Environment Configuration Create an environment.yml file to define dependencies: ```yaml name: image_descriptor_env channels: - pytorch - conda-forge - defaults dependencies: - python=3.9 - pip=23.0 - pytorch=2.0.0 - torchvision=0.15.0 - pip: - transformers>=4.36.0 - accelerate>=0.25.0 - bitsandbytes>=0.41.0 - safetensors>=0.4.0 - flask>=2.3.2 - flask-cors>=4.0.0 - gunicorn>=21.2.0 - pillow>=10.0.0 - matplotlib>=3.7.0 - python-dotenv>=1.0.0 - azureml-core>=1.48.0 - azureml-defaults>=1.48.0 - inference-schema>=1.4.1 ``` ## Step 4: Create a Model Entry Script Create a file called `score.py` to handle Azure ML model inference: ```python import json import os import io import base64 import logging import torch from PIL import Image from transformers import AutoModelForCausalLM, AutoTokenizer, AutoProcessor # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Global variables model = None processor = None tokenizer = None def init(): """Initialize the model when the service starts""" global model, processor, tokenizer logger.info("Loading model...") model_id = "Qwen/Qwen2-VL-7B" # Load model components with quantization for efficiency processor = AutoProcessor.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) # Load model with 4-bit quantization to reduce memory requirements model = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype=torch.bfloat16, load_in_4bit=True, device_map="auto" ) logger.info("Model loaded successfully") def run(raw_data): """Process an image and generate descriptions Args: raw_data: A JSON string containing the image as base64 encoded data Returns: A JSON string containing the descriptions """ global model, processor, tokenizer try: # Parse input data = json.loads(raw_data) # Get the image data (from base64 or URL) if 'image_data' in data: image_bytes = base64.b64decode(data['image_data']) image = Image.open(io.BytesIO(image_bytes)).convert('RGB') logger.info("Loaded image from base64 data") elif 'image_url' in data: # Handle image URLs (for Azure Storage or public URLs) from urllib.request import urlopen with urlopen(data['image_url']) as response: image_bytes = response.read() image = Image.open(io.BytesIO(image_bytes)).convert('RGB') logger.info(f"Loaded image from URL: {data['image_url']}") else: return json.dumps({"error": "No image data or URL provided"}) # Process the image inputs = processor( images=image, return_tensors="pt" ).to(model.device) # Basic description prompt prompt_basic = "Describe this image briefly." input_ids_basic = tokenizer(prompt_basic, return_tensors="pt").input_ids.to(model.device) # Detailed description prompt prompt_detailed = "Analyze this image in detail. Describe the main elements, any text visible, the colors, and the overall composition." input_ids_detailed = tokenizer(prompt_detailed, return_tensors="pt").input_ids.to(model.device) # Technical analysis prompt prompt_technical = "What can you tell me about the technical aspects of this image?" input_ids_technical = tokenizer(prompt_technical, return_tensors="pt").input_ids.to(model.device) # Generate outputs for each prompt # Basic description with torch.no_grad(): output_basic = model.generate( **inputs, input_ids=input_ids_basic, max_new_tokens=150, do_sample=False ) basic_description = tokenizer.decode(output_basic[0], skip_special_tokens=True).replace(prompt_basic, "").strip() # Detailed description with torch.no_grad(): output_detailed = model.generate( **inputs, input_ids=input_ids_detailed, max_new_tokens=300, do_sample=False ) detailed_description = tokenizer.decode(output_detailed[0], skip_special_tokens=True).replace(prompt_detailed, "").strip() # Technical analysis with torch.no_grad(): output_technical = model.generate( **inputs, input_ids=input_ids_technical, max_new_tokens=200, do_sample=False ) technical_analysis = tokenizer.decode(output_technical[0], skip_special_tokens=True).replace(prompt_technical, "").strip() # Return the results return json.dumps({ "success": True, "basic_description": basic_description, "detailed_description": detailed_description, "technical_analysis": technical_analysis }) except Exception as e: logger.error(f"Error processing image: {str(e)}", exc_info=True) return json.dumps({"error": f"Error generating description: {str(e)}"}) ``` ## Step 5: Register the Model 1. Create a model.yml file: ```yaml $schema: https://azuremlschemas.azureedge.net/latest/model.schema.json name: qwen-vl-image-descriptor version: 1 description: Qwen2-VL-7B model for image description path: . ``` 2. Register the model: ```bash az ml model create --file model.yml \ --workspace-name image-descriptor-ws \ --resource-group image-descriptor-rg ``` ## Step 6: Deploy as an Online Endpoint 1. Create an endpoint.yml file: ```yaml $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineEndpoint.schema.json name: image-descriptor-endpoint description: Endpoint for image description auth_mode: key ``` 2. Create a deployment.yml file: ```yaml $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json name: qwen-vl-deployment endpoint_name: image-descriptor-endpoint model: azureml:qwen-vl-image-descriptor:1 environment: conda_file: environment.yml image: mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.6-cudnn8-ubuntu20.04:latest instance_type: Standard_NC6s_v3 instance_count: 1 request_settings: max_concurrent_requests_per_instance: 1 request_timeout_ms: 120000 ``` 3. Create the endpoint: ```bash az ml online-endpoint create --file endpoint.yml \ --workspace-name image-descriptor-ws \ --resource-group image-descriptor-rg ``` 4. Create the deployment: ```bash az ml online-deployment create --file deployment.yml \ --workspace-name image-descriptor-ws \ --resource-group image-descriptor-rg ``` 5. Allocate 100% traffic to the deployment: ```bash az ml online-endpoint update --name image-descriptor-endpoint \ --traffic "qwen-vl-deployment=100" \ --workspace-name image-descriptor-ws \ --resource-group image-descriptor-rg ``` ## Step 7: Test the Endpoint You can test the endpoint using the Azure ML SDK: ```python import json import base64 from azure.ai.ml import MLClient from azure.identity import DefaultAzureCredential from azure.ai.ml.entities import ManagedOnlineEndpoint # Get a handle to the workspace credential = DefaultAzureCredential() ml_client = MLClient( credential=credential, subscription_id="your-subscription-id", resource_group_name="image-descriptor-rg", workspace_name="image-descriptor-ws" ) # Get endpoint endpoint = ml_client.online_endpoints.get("image-descriptor-endpoint") # Load and encode the image with open('data_temp/page_2.png', 'rb') as f: image_data = f.read() image_b64 = base64.b64encode(image_data).decode('utf-8') # Create the request payload payload = { 'image_data': image_b64 } # Invoke the endpoint response = ml_client.online_endpoints.invoke( endpoint_name="image-descriptor-endpoint", request_file=json.dumps(payload), deployment_name="qwen-vl-deployment" ) # Parse the response result = json.loads(response) print(json.dumps(result, indent=2)) ``` ## Cost Optimization To optimize costs: 1. Use a smaller compute size if possible 2. Scale to zero instances when not in use 3. Set up autoscaling rules 4. Consider reserved instances for long-term deployments ## Monitoring Monitor your endpoint using: 1. Azure Monitor 2. Application Insights 3. Azure ML metrics dashboard 4. Set up alerts for anomalies ## Cleanup To avoid ongoing charges, delete resources when not in use: ```bash # Delete the endpoint az ml online-endpoint delete --name image-descriptor-endpoint \ --workspace-name image-descriptor-ws \ --resource-group image-descriptor-rg -y # Delete compute cluster az ml compute delete --name gpu-cluster \ --workspace-name image-descriptor-ws \ --resource-group image-descriptor-rg -y # Delete workspace (optional) az ml workspace delete --name image-descriptor-ws \ --resource-group image-descriptor-rg -y # Delete resource group (optional) az group delete --name image-descriptor-rg -y ```