|
# Azure Machine Learning Deployment Guide |
|
|
|
This guide provides step-by-step instructions for deploying the Image Description application to Azure Machine Learning. |
|
|
|
## Prerequisites |
|
|
|
- Azure subscription |
|
- Azure CLI installed and configured |
|
- Azure Machine Learning workspace |
|
- The source code from this repository |
|
|
|
## Step 1: Set Up Azure Machine Learning |
|
|
|
1. Create a Resource Group (if you don't have one): |
|
|
|
```bash |
|
az group create --name image-descriptor-rg --location eastus |
|
``` |
|
|
|
2. Create an Azure Machine Learning workspace: |
|
|
|
```bash |
|
az ml workspace create --workspace-name image-descriptor-ws \ |
|
--resource-group image-descriptor-rg \ |
|
--location eastus |
|
``` |
|
|
|
## Step 2: Create a Compute Cluster |
|
|
|
Create a GPU-enabled compute cluster for training and inference: |
|
|
|
```bash |
|
az ml compute create --name gpu-cluster \ |
|
--workspace-name image-descriptor-ws \ |
|
--resource-group image-descriptor-rg \ |
|
--type AmlCompute \ |
|
--min-instances 0 \ |
|
--max-instances 1 \ |
|
--size Standard_NC6s_v3 |
|
``` |
|
|
|
## Step 3: Prepare Environment Configuration |
|
|
|
Create an environment.yml file to define dependencies: |
|
|
|
```yaml |
|
name: image_descriptor_env |
|
channels: |
|
- pytorch |
|
- conda-forge |
|
- defaults |
|
dependencies: |
|
- python=3.9 |
|
- pip=23.0 |
|
- pytorch=2.0.0 |
|
- torchvision=0.15.0 |
|
- pip: |
|
- transformers>=4.36.0 |
|
- accelerate>=0.25.0 |
|
- bitsandbytes>=0.41.0 |
|
- safetensors>=0.4.0 |
|
- flask>=2.3.2 |
|
- flask-cors>=4.0.0 |
|
- gunicorn>=21.2.0 |
|
- pillow>=10.0.0 |
|
- matplotlib>=3.7.0 |
|
- python-dotenv>=1.0.0 |
|
- azureml-core>=1.48.0 |
|
- azureml-defaults>=1.48.0 |
|
- inference-schema>=1.4.1 |
|
``` |
|
|
|
## Step 4: Create a Model Entry Script |
|
|
|
Create a file called `score.py` to handle Azure ML model inference: |
|
|
|
```python |
|
import json |
|
import os |
|
import io |
|
import base64 |
|
import logging |
|
import torch |
|
from PIL import Image |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoProcessor |
|
|
|
# Configure logging |
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
# Global variables |
|
model = None |
|
processor = None |
|
tokenizer = None |
|
|
|
def init(): |
|
"""Initialize the model when the service starts""" |
|
global model, processor, tokenizer |
|
|
|
logger.info("Loading model...") |
|
model_id = "Qwen/Qwen2-VL-7B" |
|
|
|
# Load model components with quantization for efficiency |
|
processor = AutoProcessor.from_pretrained(model_id) |
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
|
|
# Load model with 4-bit quantization to reduce memory requirements |
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_id, |
|
torch_dtype=torch.bfloat16, |
|
load_in_4bit=True, |
|
device_map="auto" |
|
) |
|
logger.info("Model loaded successfully") |
|
|
|
def run(raw_data): |
|
"""Process an image and generate descriptions |
|
|
|
Args: |
|
raw_data: A JSON string containing the image as base64 encoded data |
|
|
|
Returns: |
|
A JSON string containing the descriptions |
|
""" |
|
global model, processor, tokenizer |
|
|
|
try: |
|
# Parse input |
|
data = json.loads(raw_data) |
|
|
|
# Get the image data (from base64 or URL) |
|
if 'image_data' in data: |
|
image_bytes = base64.b64decode(data['image_data']) |
|
image = Image.open(io.BytesIO(image_bytes)).convert('RGB') |
|
logger.info("Loaded image from base64 data") |
|
elif 'image_url' in data: |
|
# Handle image URLs (for Azure Storage or public URLs) |
|
from urllib.request import urlopen |
|
with urlopen(data['image_url']) as response: |
|
image_bytes = response.read() |
|
image = Image.open(io.BytesIO(image_bytes)).convert('RGB') |
|
logger.info(f"Loaded image from URL: {data['image_url']}") |
|
else: |
|
return json.dumps({"error": "No image data or URL provided"}) |
|
|
|
# Process the image |
|
inputs = processor( |
|
images=image, |
|
return_tensors="pt" |
|
).to(model.device) |
|
|
|
# Basic description prompt |
|
prompt_basic = "Describe this image briefly." |
|
input_ids_basic = tokenizer(prompt_basic, return_tensors="pt").input_ids.to(model.device) |
|
|
|
# Detailed description prompt |
|
prompt_detailed = "Analyze this image in detail. Describe the main elements, any text visible, the colors, and the overall composition." |
|
input_ids_detailed = tokenizer(prompt_detailed, return_tensors="pt").input_ids.to(model.device) |
|
|
|
# Technical analysis prompt |
|
prompt_technical = "What can you tell me about the technical aspects of this image?" |
|
input_ids_technical = tokenizer(prompt_technical, return_tensors="pt").input_ids.to(model.device) |
|
|
|
# Generate outputs for each prompt |
|
# Basic description |
|
with torch.no_grad(): |
|
output_basic = model.generate( |
|
**inputs, |
|
input_ids=input_ids_basic, |
|
max_new_tokens=150, |
|
do_sample=False |
|
) |
|
basic_description = tokenizer.decode(output_basic[0], skip_special_tokens=True).replace(prompt_basic, "").strip() |
|
|
|
# Detailed description |
|
with torch.no_grad(): |
|
output_detailed = model.generate( |
|
**inputs, |
|
input_ids=input_ids_detailed, |
|
max_new_tokens=300, |
|
do_sample=False |
|
) |
|
detailed_description = tokenizer.decode(output_detailed[0], skip_special_tokens=True).replace(prompt_detailed, "").strip() |
|
|
|
# Technical analysis |
|
with torch.no_grad(): |
|
output_technical = model.generate( |
|
**inputs, |
|
input_ids=input_ids_technical, |
|
max_new_tokens=200, |
|
do_sample=False |
|
) |
|
technical_analysis = tokenizer.decode(output_technical[0], skip_special_tokens=True).replace(prompt_technical, "").strip() |
|
|
|
# Return the results |
|
return json.dumps({ |
|
"success": True, |
|
"basic_description": basic_description, |
|
"detailed_description": detailed_description, |
|
"technical_analysis": technical_analysis |
|
}) |
|
|
|
except Exception as e: |
|
logger.error(f"Error processing image: {str(e)}", exc_info=True) |
|
return json.dumps({"error": f"Error generating description: {str(e)}"}) |
|
``` |
|
|
|
## Step 5: Register the Model |
|
|
|
1. Create a model.yml file: |
|
|
|
```yaml |
|
$schema: https://azuremlschemas.azureedge.net/latest/model.schema.json |
|
name: qwen-vl-image-descriptor |
|
version: 1 |
|
description: Qwen2-VL-7B model for image description |
|
path: . |
|
``` |
|
|
|
2. Register the model: |
|
|
|
```bash |
|
az ml model create --file model.yml \ |
|
--workspace-name image-descriptor-ws \ |
|
--resource-group image-descriptor-rg |
|
``` |
|
|
|
## Step 6: Deploy as an Online Endpoint |
|
|
|
1. Create an endpoint.yml file: |
|
|
|
```yaml |
|
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineEndpoint.schema.json |
|
name: image-descriptor-endpoint |
|
description: Endpoint for image description |
|
auth_mode: key |
|
``` |
|
|
|
2. Create a deployment.yml file: |
|
|
|
```yaml |
|
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json |
|
name: qwen-vl-deployment |
|
endpoint_name: image-descriptor-endpoint |
|
model: azureml:qwen-vl-image-descriptor:1 |
|
environment: |
|
conda_file: environment.yml |
|
image: mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.6-cudnn8-ubuntu20.04:latest |
|
instance_type: Standard_NC6s_v3 |
|
instance_count: 1 |
|
request_settings: |
|
max_concurrent_requests_per_instance: 1 |
|
request_timeout_ms: 120000 |
|
``` |
|
|
|
3. Create the endpoint: |
|
|
|
```bash |
|
az ml online-endpoint create --file endpoint.yml \ |
|
--workspace-name image-descriptor-ws \ |
|
--resource-group image-descriptor-rg |
|
``` |
|
|
|
4. Create the deployment: |
|
|
|
```bash |
|
az ml online-deployment create --file deployment.yml \ |
|
--workspace-name image-descriptor-ws \ |
|
--resource-group image-descriptor-rg |
|
``` |
|
|
|
5. Allocate 100% traffic to the deployment: |
|
|
|
```bash |
|
az ml online-endpoint update --name image-descriptor-endpoint \ |
|
--traffic "qwen-vl-deployment=100" \ |
|
--workspace-name image-descriptor-ws \ |
|
--resource-group image-descriptor-rg |
|
``` |
|
|
|
## Step 7: Test the Endpoint |
|
|
|
You can test the endpoint using the Azure ML SDK: |
|
|
|
```python |
|
import json |
|
import base64 |
|
from azure.ai.ml import MLClient |
|
from azure.identity import DefaultAzureCredential |
|
from azure.ai.ml.entities import ManagedOnlineEndpoint |
|
|
|
# Get a handle to the workspace |
|
credential = DefaultAzureCredential() |
|
ml_client = MLClient( |
|
credential=credential, |
|
subscription_id="your-subscription-id", |
|
resource_group_name="image-descriptor-rg", |
|
workspace_name="image-descriptor-ws" |
|
) |
|
|
|
# Get endpoint |
|
endpoint = ml_client.online_endpoints.get("image-descriptor-endpoint") |
|
|
|
# Load and encode the image |
|
with open('data_temp/page_2.png', 'rb') as f: |
|
image_data = f.read() |
|
image_b64 = base64.b64encode(image_data).decode('utf-8') |
|
|
|
# Create the request payload |
|
payload = { |
|
'image_data': image_b64 |
|
} |
|
|
|
# Invoke the endpoint |
|
response = ml_client.online_endpoints.invoke( |
|
endpoint_name="image-descriptor-endpoint", |
|
request_file=json.dumps(payload), |
|
deployment_name="qwen-vl-deployment" |
|
) |
|
|
|
# Parse the response |
|
result = json.loads(response) |
|
print(json.dumps(result, indent=2)) |
|
``` |
|
|
|
## Cost Optimization |
|
|
|
To optimize costs: |
|
|
|
1. Use a smaller compute size if possible |
|
2. Scale to zero instances when not in use |
|
3. Set up autoscaling rules |
|
4. Consider reserved instances for long-term deployments |
|
|
|
## Monitoring |
|
|
|
Monitor your endpoint using: |
|
|
|
1. Azure Monitor |
|
2. Application Insights |
|
3. Azure ML metrics dashboard |
|
4. Set up alerts for anomalies |
|
|
|
## Cleanup |
|
|
|
To avoid ongoing charges, delete resources when not in use: |
|
|
|
```bash |
|
# Delete the endpoint |
|
az ml online-endpoint delete --name image-descriptor-endpoint \ |
|
--workspace-name image-descriptor-ws \ |
|
--resource-group image-descriptor-rg -y |
|
|
|
# Delete compute cluster |
|
az ml compute delete --name gpu-cluster \ |
|
--workspace-name image-descriptor-ws \ |
|
--resource-group image-descriptor-rg -y |
|
|
|
# Delete workspace (optional) |
|
az ml workspace delete --name image-descriptor-ws \ |
|
--resource-group image-descriptor-rg -y |
|
|
|
# Delete resource group (optional) |
|
az group delete --name image-descriptor-rg -y |
|
``` |