Spaces:

AruniAnkur
/

BloomScore

Sleeping

App Files Files Community

AruniAnkur commited on Dec 21, 2024

Commit

05b526e

verified ·

1 Parent(s): 6bb6f62

Upload 2 files

Browse files

Files changed (2) hide show

distilbert_finetuing.ipynb +18 -140
t5_training.ipynb +11 -14

distilbert_finetuing.ipynb CHANGED Viewed

@@ -298,7 +298,8 @@
    "source": [
     "from transformers import DistilBertTokenizer\n",
     "import torch\n",
-    "\n",
     "# Load the DistilBERT tokenizer\n",
     "tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')\n",
     "\n",
@@ -366,7 +367,8 @@
     "from transformers import DistilBertForSequenceClassification\n",
     "\n",
     "# Load the model with a classification head\n",
-    "model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=6)  # 6 classes: 0 to 5\n"
    ]
   },
   {
@@ -424,12 +426,12 @@
     "from torch.optim.lr_scheduler import StepLR\n",
     "\n",
     "# Set up the optimizer\n",
-    "optimizer = AdamW(model.parameters(), lr=0.0001)\n",
     "\n",
     "# Define the training loop\n",
     "epochs = 1\n",
     "device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n",
-    "model.to(device)\n",
     "\n",
     "print(device)"
    ]
@@ -800,7 +802,7 @@
    ],
    "source": [
     "for epoch in range(epochs):\n",
-    "    model.train()\n",
     "    total_loss = 0\n",
     "    for batch in train_dataloader:\n",
     "        input_ids, labels = batch\n",
@@ -810,7 +812,7 @@
     "        optimizer.zero_grad()\n",
     "\n",
     "        # Forward pass\n",
-    "        outputs = model(input_ids, labels=labels)\n",
     "        loss = outputs.loss\n",
     "        total_loss += loss.item()\n",
     "\n",
@@ -835,7 +837,7 @@
     }
    ],
    "source": [
-    "model.eval()\n",
     "correct_predictions = 0\n",
     "total_predictions = 0\n",
     "\n",
@@ -844,7 +846,7 @@
     "        input_ids, labels = batch\n",
     "        input_ids, labels = input_ids.to(device), labels.to(device)\n",
     "        # Forward pass\n",
-    "        outputs = model(input_ids)\n",
     "        predictions = torch.argmax(outputs.logits, dim=-1)\n",
     "\n",
     "        correct_predictions += (predictions == labels).sum().item()\n",
@@ -872,9 +874,9 @@
     "    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)\n",
     "    input_ids = inputs['input_ids'].to(device)\n",
     "    \n",
-    "    model.eval()\n",
     "    with torch.no_grad():\n",
-    "        outputs = model(input_ids)\n",
     "        prediction = torch.argmax(outputs.logits, dim=-1)\n",
     "    return prediction.item()\n",
     "\n",
@@ -915,10 +917,10 @@
     "    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)\n",
     "    input_ids = inputs['input_ids'].to(device)\n",
     "    \n",
-    "    model.eval()\n",
     "    with torch.no_grad():\n",
     "        # Get the raw logits from the model\n",
-    "        outputs = model(input_ids)\n",
     "        logits = outputs.logits\n",
     "        \n",
     "        # Apply softmax to get probabilities\n",
@@ -961,7 +963,7 @@
     }
    ],
    "source": [
-    "model.save_pretrained('./fine_tuned_distilbert')\n",
     "\n",
     "# Save the tokenizer\n",
     "tokenizer.save_pretrained('./fine_tuned_distilbert')"
@@ -976,7 +978,7 @@
     "from transformers import DistilBertForSequenceClassification, DistilBertTokenizer\n",
     "\n",
     "# Load the saved model\n",
-    "model = DistilBertForSequenceClassification.from_pretrained('./fine_tuned_distilbert')\n",
     "\n",
     "# Load the saved tokenizer\n",
     "tokenizer = DistilBertTokenizer.from_pretrained('./fine_tuned_distilbert')\n"
@@ -1007,9 +1009,9 @@
     "    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)\n",
     "    input_ids = inputs['input_ids'].to(device)\n",
     "\n",
-    "    model.eval()\n",
     "    with torch.no_grad():\n",
-    "        outputs = model(input_ids)\n",
     "        logits = outputs.logits\n",
     "        probabilities = softmax(logits, dim=-1)\n",
     "        \n",
@@ -1028,130 +1030,6 @@
     "    print(f\"{class_label}: {prob:.4f}\")"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 55,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "e = ['@ What are the key differences between classification and regression tasks in supervised learning, and how do you determine which algorithm to use for a specific problem?',\n",
-    " '@ How does clustering differ from dimensionality reduction, and can you provide real-world examples of where each is applied?',\n",
-    " '@ What are common evaluation metrics for classification models, and how do precision, recall, and F1-score relate to each other?',\n",
-    " '@ How do convolutional neural networks (CNNs) and recurrent neural networks (RNNs) differ in their architecture and applications?',\n",
-    " '@ What steps can be taken to identify and mitigate bias in machine learning models, and why is this an important consideration?']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 56,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'Remembering': 0.10612957, 'Understanding': 0.019418646, 'Applying': 0.06178399, 'Analyzing': 0.06437193, 'Evaluating': 0.02016813, 'Creating': 0.7281277}\n",
-      "{'Remembering': 0.0023775953, 'Understanding': 0.007248114, 'Applying': 0.030584276, 'Analyzing': 0.03784482, 'Evaluating': 0.011662786, 'Creating': 0.9102824}\n",
-      "{'Remembering': 0.77779603, 'Understanding': 0.00137261, 'Applying': 0.030797651, 'Analyzing': 0.01779477, 'Evaluating': 0.015782129, 'Creating': 0.15645678}\n",
-      "{'Remembering': 0.0041304147, 'Understanding': 0.0012872498, 'Applying': 0.0071271434, 'Analyzing': 0.08727108, 'Evaluating': 0.012631507, 'Creating': 0.8875526}\n",
-      "{'Remembering': 0.02713421, 'Understanding': 0.0032449323, 'Applying': 0.0559042, 'Analyzing': 0.021534933, 'Evaluating': 0.015711982, 'Creating': 0.8764698}\n"
-     ]
-    }
-   ],
-   "source": [
-    "for i in e:\n",
-    "    class_probabilities = predict_with_loaded_model(i)\n",
-    "    print(class_probabilities)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 67,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "weights = {\n",
-    "    'Remembering': 0.5,\n",
-    "    'Understanding': 0.5,\n",
-    "    'Applying': 0.5,\n",
-    "    'Analyzing': 0.5,\n",
-    "    'Evaluating': 0.5,\n",
-    "    'Creating':0.5,\n",
-    "}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 68,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "questions = [\n",
-    "    {'Remembering': 0.10612957, 'Understanding': 0.019418646, 'Applying': 0.06178399, 'Analyzing': 0.06437193, 'Evaluating': 0.02016813, 'Creating': 0.7281277},\n",
-    "    {'Remembering': 0.0023775953, 'Understanding': 0.007248114, 'Applying': 0.030584276, 'Analyzing': 0.03784482, 'Evaluating': 0.011662786, 'Creating': 0.9102824},\n",
-    "    {'Remembering': 0.77779603, 'Understanding': 0.00137261, 'Applying': 0.030797651, 'Analyzing': 0.01779477, 'Evaluating': 0.015782129, 'Creating': 0.15645678},\n",
-    "    {'Remembering': 0.0041304147, 'Understanding': 0.0012872498, 'Applying': 0.0071271434, 'Analyzing': 0.08727108, 'Evaluating': 0.012631507, 'Creating': 0.8875526},\n",
-    "    {'Remembering': 0.02713421, 'Understanding': 0.0032449323, 'Applying': 0.0559042, 'Analyzing': 0.021534933, 'Evaluating': 0.015711982, 'Creating': 0.8764698}\n",
-    "]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 69,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2.49999998975 18.0 90.0\n",
-      "Normalized Score of the Paper: 0.0278\n"
-     ]
-    }
-   ],
-   "source": [
-    "def calculate_score(question, weights):\n",
-    "    score = sum(question[level] * weight for level, weight in weights.items())\n",
-    "    return score\n",
-    "\n",
-    "total_score = sum(calculate_score(q, weights) for q in questions)\n",
-    "max_score_per_question = sum([weights[level] for level in weights]) * 6  \n",
-    "max_total_score = max_score_per_question * len(questions) \n",
-    "normalized_score = (total_score - 0) / (max_total_score - 0)\n",
-    "print(total_score, max_score_per_question, max_total_score)\n",
-    "print(f\"Normalized Score of the Paper: {normalized_score:.4f}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 70,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'Remembering': 0.10612957, 'Understanding': 0.019418646, 'Applying': 0.06178399, 'Analyzing': 0.06437193, 'Evaluating': 0.02016813, 'Creating': 0.7281277}\n",
-      "{'Remembering': 0.0023775953, 'Understanding': 0.007248114, 'Applying': 0.030584276, 'Analyzing': 0.03784482, 'Evaluating': 0.011662786, 'Creating': 0.9102824}\n",
-      "{'Remembering': 0.77779603, 'Understanding': 0.00137261, 'Applying': 0.030797651, 'Analyzing': 0.01779477, 'Evaluating': 0.015782129, 'Creating': 0.15645678}\n",
-      "{'Remembering': 0.0041304147, 'Understanding': 0.0012872498, 'Applying': 0.0071271434, 'Analyzing': 0.08727108, 'Evaluating': 0.012631507, 'Creating': 0.8875526}\n",
-      "{'Remembering': 0.02713421, 'Understanding': 0.0032449323, 'Applying': 0.0559042, 'Analyzing': 0.021534933, 'Evaluating': 0.015711982, 'Creating': 0.8764698}\n"
-     ]
-    }
-   ],
-   "source": [
-    "for i in e:\n",
-    "    class_probabilities = predict_with_loaded_model(i)\n",
-    "    print(class_probabilities)"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,

    "source": [
     "from transformers import DistilBertTokenizer\n",
     "import torch\n",
+    "from torch.utils.data import DataLoader\n",
+    "import intel_extension_for_pytorch as ipex\n",
     "# Load the DistilBERT tokenizer\n",
     "tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')\n",
     "\n",
     "from transformers import DistilBertForSequenceClassification\n",
     "\n",
     "# Load the model with a classification head\n",
+    "model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=6)  # 6 classes: 0 to 5\n",
+    "optimized_model = ipex.optimize(model, dtype=torch.float32)"
    ]
   },
   {
     "from torch.optim.lr_scheduler import StepLR\n",
     "\n",
     "# Set up the optimizer\n",
+    "optimizer = AdamW(optimized_model.parameters(), lr=0.0001)\n",
     "\n",
     "# Define the training loop\n",
     "epochs = 1\n",
     "device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n",
+    "optimized_model.to(device)\n",
     "\n",
     "print(device)"
    ]
    ],
    "source": [
     "for epoch in range(epochs):\n",
+    "    optimized_model.train()\n",
     "    total_loss = 0\n",
     "    for batch in train_dataloader:\n",
     "        input_ids, labels = batch\n",
     "        optimizer.zero_grad()\n",
     "\n",
     "        # Forward pass\n",
+    "        outputs = optimized_model(input_ids, labels=labels)\n",
     "        loss = outputs.loss\n",
     "        total_loss += loss.item()\n",
     "\n",
     }
    ],
    "source": [
+    "optimized_model.eval()\n",
     "correct_predictions = 0\n",
     "total_predictions = 0\n",
     "\n",
     "        input_ids, labels = batch\n",
     "        input_ids, labels = input_ids.to(device), labels.to(device)\n",
     "        # Forward pass\n",
+    "        outputs = optimized_model(input_ids)\n",
     "        predictions = torch.argmax(outputs.logits, dim=-1)\n",
     "\n",
     "        correct_predictions += (predictions == labels).sum().item()\n",
     "    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)\n",
     "    input_ids = inputs['input_ids'].to(device)\n",
     "    \n",
+    "    optimized_model.eval()\n",
     "    with torch.no_grad():\n",
+    "        outputs = optimized_model(input_ids)\n",
     "        prediction = torch.argmax(outputs.logits, dim=-1)\n",
     "    return prediction.item()\n",
     "\n",
     "    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)\n",
     "    input_ids = inputs['input_ids'].to(device)\n",
     "    \n",
+    "    optimized_model.eval()\n",
     "    with torch.no_grad():\n",
     "        # Get the raw logits from the model\n",
+    "        outputs = optimized_model(input_ids)\n",
     "        logits = outputs.logits\n",
     "        \n",
     "        # Apply softmax to get probabilities\n",
     }
    ],
    "source": [
+    "optimized_model.save_pretrained('./fine_tuned_distilbert')\n",
     "\n",
     "# Save the tokenizer\n",
     "tokenizer.save_pretrained('./fine_tuned_distilbert')"
     "from transformers import DistilBertForSequenceClassification, DistilBertTokenizer\n",
     "\n",
     "# Load the saved model\n",
+    "optimized_model = DistilBertForSequenceClassification.from_pretrained('./fine_tuned_distilbert')\n",
     "\n",
     "# Load the saved tokenizer\n",
     "tokenizer = DistilBertTokenizer.from_pretrained('./fine_tuned_distilbert')\n"
     "    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)\n",
     "    input_ids = inputs['input_ids'].to(device)\n",
     "\n",
+    "    optimized_model.eval()\n",
     "    with torch.no_grad():\n",
+    "        outputs = optimized_model(input_ids)\n",
     "        logits = outputs.logits\n",
     "        probabilities = softmax(logits, dim=-1)\n",
     "        \n",
     "    print(f\"{class_label}: {prob:.4f}\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,

t5_training.ipynb CHANGED Viewed

@@ -25,19 +25,22 @@
     "from transformers import T5ForConditionalGeneration, T5Tokenizer\n",
     "from datasets import Dataset\n",
     "from transformers import Trainer, TrainingArguments\n",
     "import json\n",
     "\n",
     "# Load pre-trained FLAN-T5 model and tokenizer\n",
     "model_name = \"google/flan-t5-large\"  # FLAN-T5 Base Model\n",
     "tokenizer = T5Tokenizer.from_pretrained(model_name)\n",
     "model = T5ForConditionalGeneration.from_pretrained(model_name)\n",
-    "\n",
     "# Example input-output pair for fine-tuning\n",
-    "data = json.load('t5train.json')\n",
     "\n",
     "# Convert the data to a Hugging Face dataset\n",
     "dataset = Dataset.from_dict(data)\n",
-    "\n",
     "# Tokenize the data\n",
     "def preprocess_function(examples):\n",
     "    model_inputs = tokenizer(examples['input_text'], padding=\"max_length\", truncation=True, max_length=2048)\n",
@@ -71,7 +74,7 @@
     "\n",
     "# Initialize the Trainer class\n",
     "trainer = Trainer(\n",
-    "    model=model,\n",
     "    args=training_args,\n",
     "    train_dataset=tokenized_datasets,\n",
     "    eval_dataset=tokenized_datasets  # Use the same dataset for evaluation since we only have one data point\n",
@@ -82,17 +85,11 @@
     "\n",
     "# Save the fine-tuned model\n",
     "#trainer.save_model(\"./flan_t5_finetuned\")\n",
-    "model.save_pretrained(\"./flan_t5_finetuned\")\n",
     "tokenizer.save_pretrained(\"./flan_t5_finetuned\")\n",
     "\n",
     "# Evaluate the model on the training data (for a single example)\n",
-    "model.eval()\n",
-    "inputs = tokenizer(\"What are the key differences between classification and regression tasks in supervised learning, and how do you determine which algorithm to use for a specific problem? e How does clustering differ from dimensionality reduction, and can you provide real-world examples of where each is applied?\", return_tensors=\"pt\", padding=True)\n",
-    "outputs = model.generate(inputs['input_ids'], max_length=1024)\n",
-    "\n",
-    "# Decode the generated output\n",
-    "generated_output = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
-    "print(generated_output)"
    ]
   },
   {
@@ -110,14 +107,14 @@
     "\n",
     "# Load your FP32 model\n",
     "model_path = \"./flan_t5_finetuned\"\n",
-    "model = T5ForConditionalGeneration.from_pretrained(model_path)\n",
     "tokenizer = T5Tokenizer.from_pretrained(model_path)\n",
     "\n",
     "# Define the quantization configuration\n",
     "quant_config = PostTrainingQuantConfig(approach='dynamic')  # Dynamic quantization\n",
     "\n",
     "# Quantize the model\n",
-    "q_model = fit(model=model, conf=quant_config)\n",
     "\n",
     "# Save the quantized model\n",
     "quantized_model_path = \"./flan_t5_quantized_fp16\"\n",

     "from transformers import T5ForConditionalGeneration, T5Tokenizer\n",
     "from datasets import Dataset\n",
     "from transformers import Trainer, TrainingArguments\n",
+    "import torch\n",
+    "from torch.utils.data import DataLoader\n",
+    "import intel_extension_for_pytorch as ipex\n",
     "import json\n",
     "\n",
     "# Load pre-trained FLAN-T5 model and tokenizer\n",
     "model_name = \"google/flan-t5-large\"  # FLAN-T5 Base Model\n",
     "tokenizer = T5Tokenizer.from_pretrained(model_name)\n",
     "model = T5ForConditionalGeneration.from_pretrained(model_name)\n",
+    "optimized_model = ipex.optimize(model, dtype=torch.float32)\n",
     "# Example input-output pair for fine-tuning\n",
+    "data = json.load(\"t5train.json\")\n",
     "\n",
     "# Convert the data to a Hugging Face dataset\n",
     "dataset = Dataset.from_dict(data)\n",
+    "dataloader = DataLoader(dataset, num_workers=4, pin_memory=True)\n",
     "# Tokenize the data\n",
     "def preprocess_function(examples):\n",
     "    model_inputs = tokenizer(examples['input_text'], padding=\"max_length\", truncation=True, max_length=2048)\n",
     "\n",
     "# Initialize the Trainer class\n",
     "trainer = Trainer(\n",
+    "    model=optimized_model,\n",
     "    args=training_args,\n",
     "    train_dataset=tokenized_datasets,\n",
     "    eval_dataset=tokenized_datasets  # Use the same dataset for evaluation since we only have one data point\n",
     "\n",
     "# Save the fine-tuned model\n",
     "#trainer.save_model(\"./flan_t5_finetuned\")\n",
+    "optimized_model.save_pretrained(\"./flan_t5_finetuned\")\n",
     "tokenizer.save_pretrained(\"./flan_t5_finetuned\")\n",
     "\n",
     "# Evaluate the model on the training data (for a single example)\n",
+    "optimized_model.eval()"
    ]
   },
   {
     "\n",
     "# Load your FP32 model\n",
     "model_path = \"./flan_t5_finetuned\"\n",
+    "optimized_model = T5ForConditionalGeneration.from_pretrained(model_path)\n",
     "tokenizer = T5Tokenizer.from_pretrained(model_path)\n",
     "\n",
     "# Define the quantization configuration\n",
     "quant_config = PostTrainingQuantConfig(approach='dynamic')  # Dynamic quantization\n",
     "\n",
     "# Quantize the model\n",
+    "q_model = fit(model=optimized_model, conf=quant_config)\n",
     "\n",
     "# Save the quantized model\n",
     "quantized_model_path = \"./flan_t5_quantized_fp16\"\n",