{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d071d3d0-aa2f-4582-8e43-12f22e64bbee",
   "metadata": {},
   "outputs": [],
   "source": [
    "# !pip install pytorch \n",
    "# !pip install intel-extension-for-pytorch\n",
    "# !pip install transformers\n",
    "# !pip install datasets\n",
    "# !pip install onnxruntime\n",
    "# !pip install neural_compressor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2d21c5cb-8042-4d63-8534-eb686acf4bf6",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import T5ForConditionalGeneration, T5Tokenizer\n",
    "from datasets import Dataset\n",
    "from transformers import Trainer, TrainingArguments\n",
    "import torch\n",
    "from torch.utils.data import DataLoader\n",
    "import intel_extension_for_pytorch as ipex\n",
    "import json\n",
    "\n",
    "# Load pre-trained FLAN-T5 model and tokenizer\n",
    "model_name = \"google/flan-t5-large\"  # FLAN-T5 Base Model\n",
    "tokenizer = T5Tokenizer.from_pretrained(model_name)\n",
    "model = T5ForConditionalGeneration.from_pretrained(model_name)\n",
    "optimized_model = ipex.optimize(model, dtype=torch.float32)\n",
    "# Example input-output pair for fine-tuning\n",
    "data = json.load(\"t5train.json\")\n",
    "\n",
    "# Convert the data to a Hugging Face dataset\n",
    "dataset = Dataset.from_dict(data)\n",
    "dataloader = DataLoader(dataset, num_workers=4, pin_memory=True)\n",
    "# Tokenize the data\n",
    "def preprocess_function(examples):\n",
    "    model_inputs = tokenizer(examples['input_text'], padding=\"max_length\", truncation=True, max_length=2048)\n",
    "    labels = tokenizer(examples['output_text'], padding=\"max_length\", truncation=True, max_length=2048)\n",
    "    model_inputs['labels'] = labels['input_ids']\n",
    "    return model_inputs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2e0d06e8-f50a-4a22-93b7-44152f06e462",
   "metadata": {},
   "outputs": [],
   "source": [
    "tokenized_datasets = dataset.map(preprocess_function, batched=True)\n",
    "\n",
    "# Set up the training arguments\n",
    "training_args = TrainingArguments(\n",
    "    output_dir=\"./flan_t5_results\",  # Output directory for model checkpoints\n",
    "    eval_strategy=\"epoch\",     # Evaluation strategy to use\n",
    "    learning_rate=2e-5,              # Learning rate for fine-tuning\n",
    "    per_device_train_batch_size=1,   # Batch size for training\n",
    "    num_train_epochs=1,              # Number of epochs\n",
    "    weight_decay=0.01,               # Weight decay for regularization\n",
    "    save_steps=10,                   # Save model every 10 steps\n",
    "    save_total_limit=1,             # Limit the number of saved models\n",
    "    fp16=False,  # Disable mixed precision\n",
    "    use_cpu=True  # Force CPU-only training\n",
    ")\n",
    "\n",
    "# Initialize the Trainer class\n",
    "trainer = Trainer(\n",
    "    model=optimized_model,\n",
    "    args=training_args,\n",
    "    train_dataset=tokenized_datasets,\n",
    "    eval_dataset=tokenized_datasets  # Use the same dataset for evaluation since we only have one data point\n",
    ")\n",
    "\n",
    "# Start training (this will fine-tune the model on the given example)\n",
    "trainer.train()\n",
    "\n",
    "# Save the fine-tuned model\n",
    "#trainer.save_model(\"./flan_t5_finetuned\")\n",
    "optimized_model.save_pretrained(\"./flan_t5_finetuned\")\n",
    "tokenizer.save_pretrained(\"./flan_t5_finetuned\")\n",
    "\n",
    "# Evaluate the model on the training data (for a single example)\n",
    "optimized_model.eval()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d4b97afe-f09a-4bee-9139-ed9802da712e",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "from transformers import T5ForConditionalGeneration, T5Tokenizer\n",
    "from neural_compressor.quantization import fit\n",
    "from neural_compressor.config import PostTrainingQuantConfig\n",
    "\n",
    "# Load your FP32 model\n",
    "model_path = \"./flan_t5_finetuned\"\n",
    "optimized_model = T5ForConditionalGeneration.from_pretrained(model_path)\n",
    "tokenizer = T5Tokenizer.from_pretrained(model_path)\n",
    "\n",
    "# Define the quantization configuration\n",
    "quant_config = PostTrainingQuantConfig(approach='dynamic')  # Dynamic quantization\n",
    "\n",
    "# Quantize the model\n",
    "q_model = fit(model=optimized_model, conf=quant_config)\n",
    "\n",
    "# Save the quantized model\n",
    "quantized_model_path = \"./flan_t5_quantized_fp16\"\n",
    "q_model.save_pretrained(quantized_model_path)\n",
    "tokenizer.save_pretrained(quantized_model_path)\n",
    "\n",
    "print(f\"Quantized model saved at: {quantized_model_path}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a152f3d9-7042-479b-b3ba-ff5c957be518",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "from transformers import T5ForConditionalGeneration, T5Tokenizer\n",
    "import os\n",
    "\n",
    "# Load the FP16 model\n",
    "model_path = \"./flan_t5_fp16\"\n",
    "model = T5ForConditionalGeneration.from_pretrained(model_path)\n",
    "tokenizer = T5Tokenizer.from_pretrained(model_path)\n",
    "\n",
    "# Set the model to evaluation mode\n",
    "model.eval()\n",
    "\n",
    "# Example input text\n",
    "input_text = \"Translate English to French: How are you?\"\n",
    "inputs = tokenizer(input_text, return_tensors=\"pt\", padding=True, truncation=True)\n",
    "\n",
    "# Prepare decoder input: <pad> token is used as the first decoder input\n",
    "decoder_start_token_id = tokenizer.pad_token_id\n",
    "decoder_input_ids = torch.tensor([[decoder_start_token_id]])\n",
    "\n",
    "# Create output directory if it doesn't exist\n",
    "onnx_output_dir = \"./flant5\"\n",
    "os.makedirs(onnx_output_dir, exist_ok=True)\n",
    "\n",
    "# Define the path for the ONNX model\n",
    "onnx_model_path = os.path.join(onnx_output_dir, \"flan_t5_fp16.onnx\")\n",
    "\n",
    "# Export the model to ONNX\n",
    "torch.onnx.export(\n",
    "    model,                                    # Model to be converted\n",
    "    (inputs[\"input_ids\"], inputs[\"attention_mask\"], decoder_input_ids),  # Input tuple\n",
    "    onnx_model_path,                         # Path to save the ONNX model\n",
    "    export_params=True,                      # Store the trained parameters\n",
    "    opset_version=13,                        # ONNX version\n",
    "    do_constant_folding=True,                # Optimize constants\n",
    "    input_names=[\"input_ids\", \"attention_mask\", \"decoder_input_ids\"],  # Input tensor names\n",
    "    output_names=[\"output\"],                 # Output tensor name\n",
    "    dynamic_axes={                           # Dynamic shapes for batching\n",
    "        \"input_ids\": {0: \"batch_size\", 1: \"sequence_length\"},\n",
    "        \"attention_mask\": {0: \"batch_size\", 1: \"sequence_length\"},\n",
    "        \"decoder_input_ids\": {0: \"batch_size\", 1: \"sequence_length\"},\n",
    "        \"output\": {0: \"batch_size\", 1: \"sequence_length\"}\n",
    "    }\n",
    ")\n",
    "\n",
    "print(f\"ONNX model saved at: {onnx_model_path}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "055abefb-2d0f-4819-b859-86b77270c0be",
   "metadata": {},
   "outputs": [],
   "source": [
    "import onnxruntime as ort\n",
    "import numpy as np\n",
    "from transformers import T5Tokenizer\n",
    "\n",
    "# Load the ONNX model and tokenizer\n",
    "onnx_model_path = \"./flan_t5_fp16.onnx\"\n",
    "tokenizer = T5Tokenizer.from_pretrained(\"./flan_t5_fp16\")\n",
    "ort_session = ort.InferenceSession(onnx_model_path)\n",
    "\n",
    "# Input text for the model\n",
    "input_text = \"Translate English to French: How are you?\"\n",
    "inputs = tokenizer(input_text, return_tensors=\"np\", padding=True, truncation=True)\n",
    "\n",
    "# Ensure inputs are numpy arrays\n",
    "input_ids = np.array(inputs[\"input_ids\"], dtype=np.int64)\n",
    "attention_mask = np.array(inputs[\"attention_mask\"], dtype=np.int64)\n",
    "\n",
    "# Prepare the decoder input (<pad> token for initial input to the decoder)\n",
    "decoder_start_token_id = tokenizer.pad_token_id\n",
    "decoder_input_ids = np.array([[decoder_start_token_id]], dtype=np.int64)\n",
    "\n",
    "# ONNX model inputs\n",
    "onnx_inputs = {\n",
    "    \"input_ids\": input_ids,\n",
    "    \"attention_mask\": attention_mask,\n",
    "    \"decoder_input_ids\": decoder_input_ids\n",
    "}\n",
    "\n",
    "# Run the ONNX model\n",
    "onnx_outputs = ort_session.run(None, onnx_inputs)\n",
    "\n",
    "# Convert logits to token IDs\n",
    "logits = onnx_outputs[0]  # Shape: [batch_size, sequence_length, vocab_size]\n",
    "token_ids = np.argmax(logits, axis=-1)  # Get token IDs with the highest scores\n",
    "\n",
    "# Decode the token IDs into text\n",
    "decoded_output = tokenizer.decode(token_ids[0], skip_special_tokens=True)\n",
    "\n",
    "print(f\"ONNX Model Output: {decoded_output}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a9110235-9c49-46ef-86e1-f446b3f12d67",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}