{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "d071d3d0-aa2f-4582-8e43-12f22e64bbee", "metadata": {}, "outputs": [], "source": [ "# !pip install pytorch \n", "# !pip install intel-extension-for-pytorch\n", "# !pip install transformers\n", "# !pip install datasets\n", "# !pip install onnxruntime\n", "# !pip install neural_compressor" ] }, { "cell_type": "code", "execution_count": null, "id": "2d21c5cb-8042-4d63-8534-eb686acf4bf6", "metadata": {}, "outputs": [], "source": [ "from transformers import T5ForConditionalGeneration, T5Tokenizer\n", "from datasets import Dataset\n", "from transformers import Trainer, TrainingArguments\n", "import torch\n", "from torch.utils.data import DataLoader\n", "import intel_extension_for_pytorch as ipex\n", "import json\n", "\n", "# Load pre-trained FLAN-T5 model and tokenizer\n", "model_name = \"google/flan-t5-large\" # FLAN-T5 Base Model\n", "tokenizer = T5Tokenizer.from_pretrained(model_name)\n", "model = T5ForConditionalGeneration.from_pretrained(model_name)\n", "optimized_model = ipex.optimize(model, dtype=torch.float32)\n", "# Example input-output pair for fine-tuning\n", "data = json.load(\"t5train.json\")\n", "\n", "# Convert the data to a Hugging Face dataset\n", "dataset = Dataset.from_dict(data)\n", "dataloader = DataLoader(dataset, num_workers=4, pin_memory=True)\n", "# Tokenize the data\n", "def preprocess_function(examples):\n", " model_inputs = tokenizer(examples['input_text'], padding=\"max_length\", truncation=True, max_length=2048)\n", " labels = tokenizer(examples['output_text'], padding=\"max_length\", truncation=True, max_length=2048)\n", " model_inputs['labels'] = labels['input_ids']\n", " return model_inputs" ] }, { "cell_type": "code", "execution_count": null, "id": "2e0d06e8-f50a-4a22-93b7-44152f06e462", "metadata": {}, "outputs": [], "source": [ "tokenized_datasets = dataset.map(preprocess_function, batched=True)\n", "\n", "# Set up the training arguments\n", "training_args = TrainingArguments(\n", " output_dir=\"./flan_t5_results\", # Output directory for model checkpoints\n", " eval_strategy=\"epoch\", # Evaluation strategy to use\n", " learning_rate=2e-5, # Learning rate for fine-tuning\n", " per_device_train_batch_size=1, # Batch size for training\n", " num_train_epochs=1, # Number of epochs\n", " weight_decay=0.01, # Weight decay for regularization\n", " save_steps=10, # Save model every 10 steps\n", " save_total_limit=1, # Limit the number of saved models\n", " fp16=False, # Disable mixed precision\n", " use_cpu=True # Force CPU-only training\n", ")\n", "\n", "# Initialize the Trainer class\n", "trainer = Trainer(\n", " model=optimized_model,\n", " args=training_args,\n", " train_dataset=tokenized_datasets,\n", " eval_dataset=tokenized_datasets # Use the same dataset for evaluation since we only have one data point\n", ")\n", "\n", "# Start training (this will fine-tune the model on the given example)\n", "trainer.train()\n", "\n", "# Save the fine-tuned model\n", "#trainer.save_model(\"./flan_t5_finetuned\")\n", "optimized_model.save_pretrained(\"./flan_t5_finetuned\")\n", "tokenizer.save_pretrained(\"./flan_t5_finetuned\")\n", "\n", "# Evaluate the model on the training data (for a single example)\n", "optimized_model.eval()" ] }, { "cell_type": "code", "execution_count": null, "id": "d4b97afe-f09a-4bee-9139-ed9802da712e", "metadata": { "scrolled": true }, "outputs": [], "source": [ "from transformers import T5ForConditionalGeneration, T5Tokenizer\n", "from neural_compressor.quantization import fit\n", "from neural_compressor.config import PostTrainingQuantConfig\n", "\n", "# Load your FP32 model\n", "model_path = \"./flan_t5_finetuned\"\n", "optimized_model = T5ForConditionalGeneration.from_pretrained(model_path)\n", "tokenizer = T5Tokenizer.from_pretrained(model_path)\n", "\n", "# Define the quantization configuration\n", "quant_config = PostTrainingQuantConfig(approach='dynamic') # Dynamic quantization\n", "\n", "# Quantize the model\n", "q_model = fit(model=optimized_model, conf=quant_config)\n", "\n", "# Save the quantized model\n", "quantized_model_path = \"./flan_t5_quantized_fp16\"\n", "q_model.save_pretrained(quantized_model_path)\n", "tokenizer.save_pretrained(quantized_model_path)\n", "\n", "print(f\"Quantized model saved at: {quantized_model_path}\")" ] }, { "cell_type": "code", "execution_count": null, "id": "a152f3d9-7042-479b-b3ba-ff5c957be518", "metadata": {}, "outputs": [], "source": [ "import torch\n", "from transformers import T5ForConditionalGeneration, T5Tokenizer\n", "import os\n", "\n", "# Load the FP16 model\n", "model_path = \"./flan_t5_fp16\"\n", "model = T5ForConditionalGeneration.from_pretrained(model_path)\n", "tokenizer = T5Tokenizer.from_pretrained(model_path)\n", "\n", "# Set the model to evaluation mode\n", "model.eval()\n", "\n", "# Example input text\n", "input_text = \"Translate English to French: How are you?\"\n", "inputs = tokenizer(input_text, return_tensors=\"pt\", padding=True, truncation=True)\n", "\n", "# Prepare decoder input: token is used as the first decoder input\n", "decoder_start_token_id = tokenizer.pad_token_id\n", "decoder_input_ids = torch.tensor([[decoder_start_token_id]])\n", "\n", "# Create output directory if it doesn't exist\n", "onnx_output_dir = \"./flant5\"\n", "os.makedirs(onnx_output_dir, exist_ok=True)\n", "\n", "# Define the path for the ONNX model\n", "onnx_model_path = os.path.join(onnx_output_dir, \"flan_t5_fp16.onnx\")\n", "\n", "# Export the model to ONNX\n", "torch.onnx.export(\n", " model, # Model to be converted\n", " (inputs[\"input_ids\"], inputs[\"attention_mask\"], decoder_input_ids), # Input tuple\n", " onnx_model_path, # Path to save the ONNX model\n", " export_params=True, # Store the trained parameters\n", " opset_version=13, # ONNX version\n", " do_constant_folding=True, # Optimize constants\n", " input_names=[\"input_ids\", \"attention_mask\", \"decoder_input_ids\"], # Input tensor names\n", " output_names=[\"output\"], # Output tensor name\n", " dynamic_axes={ # Dynamic shapes for batching\n", " \"input_ids\": {0: \"batch_size\", 1: \"sequence_length\"},\n", " \"attention_mask\": {0: \"batch_size\", 1: \"sequence_length\"},\n", " \"decoder_input_ids\": {0: \"batch_size\", 1: \"sequence_length\"},\n", " \"output\": {0: \"batch_size\", 1: \"sequence_length\"}\n", " }\n", ")\n", "\n", "print(f\"ONNX model saved at: {onnx_model_path}\")" ] }, { "cell_type": "code", "execution_count": null, "id": "055abefb-2d0f-4819-b859-86b77270c0be", "metadata": {}, "outputs": [], "source": [ "import onnxruntime as ort\n", "import numpy as np\n", "from transformers import T5Tokenizer\n", "\n", "# Load the ONNX model and tokenizer\n", "onnx_model_path = \"./flan_t5_fp16.onnx\"\n", "tokenizer = T5Tokenizer.from_pretrained(\"./flan_t5_fp16\")\n", "ort_session = ort.InferenceSession(onnx_model_path)\n", "\n", "# Input text for the model\n", "input_text = \"Translate English to French: How are you?\"\n", "inputs = tokenizer(input_text, return_tensors=\"np\", padding=True, truncation=True)\n", "\n", "# Ensure inputs are numpy arrays\n", "input_ids = np.array(inputs[\"input_ids\"], dtype=np.int64)\n", "attention_mask = np.array(inputs[\"attention_mask\"], dtype=np.int64)\n", "\n", "# Prepare the decoder input ( token for initial input to the decoder)\n", "decoder_start_token_id = tokenizer.pad_token_id\n", "decoder_input_ids = np.array([[decoder_start_token_id]], dtype=np.int64)\n", "\n", "# ONNX model inputs\n", "onnx_inputs = {\n", " \"input_ids\": input_ids,\n", " \"attention_mask\": attention_mask,\n", " \"decoder_input_ids\": decoder_input_ids\n", "}\n", "\n", "# Run the ONNX model\n", "onnx_outputs = ort_session.run(None, onnx_inputs)\n", "\n", "# Convert logits to token IDs\n", "logits = onnx_outputs[0] # Shape: [batch_size, sequence_length, vocab_size]\n", "token_ids = np.argmax(logits, axis=-1) # Get token IDs with the highest scores\n", "\n", "# Decode the token IDs into text\n", "decoded_output = tokenizer.decode(token_ids[0], skip_special_tokens=True)\n", "\n", "print(f\"ONNX Model Output: {decoded_output}\")\n" ] }, { "cell_type": "code", "execution_count": null, "id": "a9110235-9c49-46ef-86e1-f446b3f12d67", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 5 }