AruniAnkur commited on
Commit
05b526e
·
verified ·
1 Parent(s): 6bb6f62

Upload 2 files

Browse files
Files changed (2) hide show
  1. distilbert_finetuing.ipynb +18 -140
  2. t5_training.ipynb +11 -14
distilbert_finetuing.ipynb CHANGED
@@ -298,7 +298,8 @@
298
  "source": [
299
  "from transformers import DistilBertTokenizer\n",
300
  "import torch\n",
301
- "\n",
 
302
  "# Load the DistilBERT tokenizer\n",
303
  "tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')\n",
304
  "\n",
@@ -366,7 +367,8 @@
366
  "from transformers import DistilBertForSequenceClassification\n",
367
  "\n",
368
  "# Load the model with a classification head\n",
369
- "model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=6) # 6 classes: 0 to 5\n"
 
370
  ]
371
  },
372
  {
@@ -424,12 +426,12 @@
424
  "from torch.optim.lr_scheduler import StepLR\n",
425
  "\n",
426
  "# Set up the optimizer\n",
427
- "optimizer = AdamW(model.parameters(), lr=0.0001)\n",
428
  "\n",
429
  "# Define the training loop\n",
430
  "epochs = 1\n",
431
  "device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n",
432
- "model.to(device)\n",
433
  "\n",
434
  "print(device)"
435
  ]
@@ -800,7 +802,7 @@
800
  ],
801
  "source": [
802
  "for epoch in range(epochs):\n",
803
- " model.train()\n",
804
  " total_loss = 0\n",
805
  " for batch in train_dataloader:\n",
806
  " input_ids, labels = batch\n",
@@ -810,7 +812,7 @@
810
  " optimizer.zero_grad()\n",
811
  "\n",
812
  " # Forward pass\n",
813
- " outputs = model(input_ids, labels=labels)\n",
814
  " loss = outputs.loss\n",
815
  " total_loss += loss.item()\n",
816
  "\n",
@@ -835,7 +837,7 @@
835
  }
836
  ],
837
  "source": [
838
- "model.eval()\n",
839
  "correct_predictions = 0\n",
840
  "total_predictions = 0\n",
841
  "\n",
@@ -844,7 +846,7 @@
844
  " input_ids, labels = batch\n",
845
  " input_ids, labels = input_ids.to(device), labels.to(device)\n",
846
  " # Forward pass\n",
847
- " outputs = model(input_ids)\n",
848
  " predictions = torch.argmax(outputs.logits, dim=-1)\n",
849
  "\n",
850
  " correct_predictions += (predictions == labels).sum().item()\n",
@@ -872,9 +874,9 @@
872
  " inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)\n",
873
  " input_ids = inputs['input_ids'].to(device)\n",
874
  " \n",
875
- " model.eval()\n",
876
  " with torch.no_grad():\n",
877
- " outputs = model(input_ids)\n",
878
  " prediction = torch.argmax(outputs.logits, dim=-1)\n",
879
  " return prediction.item()\n",
880
  "\n",
@@ -915,10 +917,10 @@
915
  " inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)\n",
916
  " input_ids = inputs['input_ids'].to(device)\n",
917
  " \n",
918
- " model.eval()\n",
919
  " with torch.no_grad():\n",
920
  " # Get the raw logits from the model\n",
921
- " outputs = model(input_ids)\n",
922
  " logits = outputs.logits\n",
923
  " \n",
924
  " # Apply softmax to get probabilities\n",
@@ -961,7 +963,7 @@
961
  }
962
  ],
963
  "source": [
964
- "model.save_pretrained('./fine_tuned_distilbert')\n",
965
  "\n",
966
  "# Save the tokenizer\n",
967
  "tokenizer.save_pretrained('./fine_tuned_distilbert')"
@@ -976,7 +978,7 @@
976
  "from transformers import DistilBertForSequenceClassification, DistilBertTokenizer\n",
977
  "\n",
978
  "# Load the saved model\n",
979
- "model = DistilBertForSequenceClassification.from_pretrained('./fine_tuned_distilbert')\n",
980
  "\n",
981
  "# Load the saved tokenizer\n",
982
  "tokenizer = DistilBertTokenizer.from_pretrained('./fine_tuned_distilbert')\n"
@@ -1007,9 +1009,9 @@
1007
  " inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)\n",
1008
  " input_ids = inputs['input_ids'].to(device)\n",
1009
  "\n",
1010
- " model.eval()\n",
1011
  " with torch.no_grad():\n",
1012
- " outputs = model(input_ids)\n",
1013
  " logits = outputs.logits\n",
1014
  " probabilities = softmax(logits, dim=-1)\n",
1015
  " \n",
@@ -1028,130 +1030,6 @@
1028
  " print(f\"{class_label}: {prob:.4f}\")"
1029
  ]
1030
  },
1031
- {
1032
- "cell_type": "code",
1033
- "execution_count": 55,
1034
- "metadata": {},
1035
- "outputs": [],
1036
- "source": [
1037
- "e = ['@ What are the key differences between classification and regression tasks in supervised learning, and how do you determine which algorithm to use for a specific problem?',\n",
1038
- " '@ How does clustering differ from dimensionality reduction, and can you provide real-world examples of where each is applied?',\n",
1039
- " '@ What are common evaluation metrics for classification models, and how do precision, recall, and F1-score relate to each other?',\n",
1040
- " '@ How do convolutional neural networks (CNNs) and recurrent neural networks (RNNs) differ in their architecture and applications?',\n",
1041
- " '@ What steps can be taken to identify and mitigate bias in machine learning models, and why is this an important consideration?']"
1042
- ]
1043
- },
1044
- {
1045
- "cell_type": "code",
1046
- "execution_count": 56,
1047
- "metadata": {},
1048
- "outputs": [
1049
- {
1050
- "name": "stdout",
1051
- "output_type": "stream",
1052
- "text": [
1053
- "{'Remembering': 0.10612957, 'Understanding': 0.019418646, 'Applying': 0.06178399, 'Analyzing': 0.06437193, 'Evaluating': 0.02016813, 'Creating': 0.7281277}\n",
1054
- "{'Remembering': 0.0023775953, 'Understanding': 0.007248114, 'Applying': 0.030584276, 'Analyzing': 0.03784482, 'Evaluating': 0.011662786, 'Creating': 0.9102824}\n",
1055
- "{'Remembering': 0.77779603, 'Understanding': 0.00137261, 'Applying': 0.030797651, 'Analyzing': 0.01779477, 'Evaluating': 0.015782129, 'Creating': 0.15645678}\n",
1056
- "{'Remembering': 0.0041304147, 'Understanding': 0.0012872498, 'Applying': 0.0071271434, 'Analyzing': 0.08727108, 'Evaluating': 0.012631507, 'Creating': 0.8875526}\n",
1057
- "{'Remembering': 0.02713421, 'Understanding': 0.0032449323, 'Applying': 0.0559042, 'Analyzing': 0.021534933, 'Evaluating': 0.015711982, 'Creating': 0.8764698}\n"
1058
- ]
1059
- }
1060
- ],
1061
- "source": [
1062
- "for i in e:\n",
1063
- " class_probabilities = predict_with_loaded_model(i)\n",
1064
- " print(class_probabilities)"
1065
- ]
1066
- },
1067
- {
1068
- "cell_type": "code",
1069
- "execution_count": 67,
1070
- "metadata": {},
1071
- "outputs": [],
1072
- "source": [
1073
- "weights = {\n",
1074
- " 'Remembering': 0.5,\n",
1075
- " 'Understanding': 0.5,\n",
1076
- " 'Applying': 0.5,\n",
1077
- " 'Analyzing': 0.5,\n",
1078
- " 'Evaluating': 0.5,\n",
1079
- " 'Creating':0.5,\n",
1080
- "}"
1081
- ]
1082
- },
1083
- {
1084
- "cell_type": "code",
1085
- "execution_count": 68,
1086
- "metadata": {},
1087
- "outputs": [],
1088
- "source": [
1089
- "questions = [\n",
1090
- " {'Remembering': 0.10612957, 'Understanding': 0.019418646, 'Applying': 0.06178399, 'Analyzing': 0.06437193, 'Evaluating': 0.02016813, 'Creating': 0.7281277},\n",
1091
- " {'Remembering': 0.0023775953, 'Understanding': 0.007248114, 'Applying': 0.030584276, 'Analyzing': 0.03784482, 'Evaluating': 0.011662786, 'Creating': 0.9102824},\n",
1092
- " {'Remembering': 0.77779603, 'Understanding': 0.00137261, 'Applying': 0.030797651, 'Analyzing': 0.01779477, 'Evaluating': 0.015782129, 'Creating': 0.15645678},\n",
1093
- " {'Remembering': 0.0041304147, 'Understanding': 0.0012872498, 'Applying': 0.0071271434, 'Analyzing': 0.08727108, 'Evaluating': 0.012631507, 'Creating': 0.8875526},\n",
1094
- " {'Remembering': 0.02713421, 'Understanding': 0.0032449323, 'Applying': 0.0559042, 'Analyzing': 0.021534933, 'Evaluating': 0.015711982, 'Creating': 0.8764698}\n",
1095
- "]"
1096
- ]
1097
- },
1098
- {
1099
- "cell_type": "code",
1100
- "execution_count": 69,
1101
- "metadata": {},
1102
- "outputs": [
1103
- {
1104
- "name": "stdout",
1105
- "output_type": "stream",
1106
- "text": [
1107
- "2.49999998975 18.0 90.0\n",
1108
- "Normalized Score of the Paper: 0.0278\n"
1109
- ]
1110
- }
1111
- ],
1112
- "source": [
1113
- "def calculate_score(question, weights):\n",
1114
- " score = sum(question[level] * weight for level, weight in weights.items())\n",
1115
- " return score\n",
1116
- "\n",
1117
- "total_score = sum(calculate_score(q, weights) for q in questions)\n",
1118
- "max_score_per_question = sum([weights[level] for level in weights]) * 6 \n",
1119
- "max_total_score = max_score_per_question * len(questions) \n",
1120
- "normalized_score = (total_score - 0) / (max_total_score - 0)\n",
1121
- "print(total_score, max_score_per_question, max_total_score)\n",
1122
- "print(f\"Normalized Score of the Paper: {normalized_score:.4f}\")"
1123
- ]
1124
- },
1125
- {
1126
- "cell_type": "code",
1127
- "execution_count": null,
1128
- "metadata": {},
1129
- "outputs": [],
1130
- "source": []
1131
- },
1132
- {
1133
- "cell_type": "code",
1134
- "execution_count": 70,
1135
- "metadata": {},
1136
- "outputs": [
1137
- {
1138
- "name": "stdout",
1139
- "output_type": "stream",
1140
- "text": [
1141
- "{'Remembering': 0.10612957, 'Understanding': 0.019418646, 'Applying': 0.06178399, 'Analyzing': 0.06437193, 'Evaluating': 0.02016813, 'Creating': 0.7281277}\n",
1142
- "{'Remembering': 0.0023775953, 'Understanding': 0.007248114, 'Applying': 0.030584276, 'Analyzing': 0.03784482, 'Evaluating': 0.011662786, 'Creating': 0.9102824}\n",
1143
- "{'Remembering': 0.77779603, 'Understanding': 0.00137261, 'Applying': 0.030797651, 'Analyzing': 0.01779477, 'Evaluating': 0.015782129, 'Creating': 0.15645678}\n",
1144
- "{'Remembering': 0.0041304147, 'Understanding': 0.0012872498, 'Applying': 0.0071271434, 'Analyzing': 0.08727108, 'Evaluating': 0.012631507, 'Creating': 0.8875526}\n",
1145
- "{'Remembering': 0.02713421, 'Understanding': 0.0032449323, 'Applying': 0.0559042, 'Analyzing': 0.021534933, 'Evaluating': 0.015711982, 'Creating': 0.8764698}\n"
1146
- ]
1147
- }
1148
- ],
1149
- "source": [
1150
- "for i in e:\n",
1151
- " class_probabilities = predict_with_loaded_model(i)\n",
1152
- " print(class_probabilities)"
1153
- ]
1154
- },
1155
  {
1156
  "cell_type": "code",
1157
  "execution_count": null,
 
298
  "source": [
299
  "from transformers import DistilBertTokenizer\n",
300
  "import torch\n",
301
+ "from torch.utils.data import DataLoader\n",
302
+ "import intel_extension_for_pytorch as ipex\n",
303
  "# Load the DistilBERT tokenizer\n",
304
  "tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')\n",
305
  "\n",
 
367
  "from transformers import DistilBertForSequenceClassification\n",
368
  "\n",
369
  "# Load the model with a classification head\n",
370
+ "model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=6) # 6 classes: 0 to 5\n",
371
+ "optimized_model = ipex.optimize(model, dtype=torch.float32)"
372
  ]
373
  },
374
  {
 
426
  "from torch.optim.lr_scheduler import StepLR\n",
427
  "\n",
428
  "# Set up the optimizer\n",
429
+ "optimizer = AdamW(optimized_model.parameters(), lr=0.0001)\n",
430
  "\n",
431
  "# Define the training loop\n",
432
  "epochs = 1\n",
433
  "device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n",
434
+ "optimized_model.to(device)\n",
435
  "\n",
436
  "print(device)"
437
  ]
 
802
  ],
803
  "source": [
804
  "for epoch in range(epochs):\n",
805
+ " optimized_model.train()\n",
806
  " total_loss = 0\n",
807
  " for batch in train_dataloader:\n",
808
  " input_ids, labels = batch\n",
 
812
  " optimizer.zero_grad()\n",
813
  "\n",
814
  " # Forward pass\n",
815
+ " outputs = optimized_model(input_ids, labels=labels)\n",
816
  " loss = outputs.loss\n",
817
  " total_loss += loss.item()\n",
818
  "\n",
 
837
  }
838
  ],
839
  "source": [
840
+ "optimized_model.eval()\n",
841
  "correct_predictions = 0\n",
842
  "total_predictions = 0\n",
843
  "\n",
 
846
  " input_ids, labels = batch\n",
847
  " input_ids, labels = input_ids.to(device), labels.to(device)\n",
848
  " # Forward pass\n",
849
+ " outputs = optimized_model(input_ids)\n",
850
  " predictions = torch.argmax(outputs.logits, dim=-1)\n",
851
  "\n",
852
  " correct_predictions += (predictions == labels).sum().item()\n",
 
874
  " inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)\n",
875
  " input_ids = inputs['input_ids'].to(device)\n",
876
  " \n",
877
+ " optimized_model.eval()\n",
878
  " with torch.no_grad():\n",
879
+ " outputs = optimized_model(input_ids)\n",
880
  " prediction = torch.argmax(outputs.logits, dim=-1)\n",
881
  " return prediction.item()\n",
882
  "\n",
 
917
  " inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)\n",
918
  " input_ids = inputs['input_ids'].to(device)\n",
919
  " \n",
920
+ " optimized_model.eval()\n",
921
  " with torch.no_grad():\n",
922
  " # Get the raw logits from the model\n",
923
+ " outputs = optimized_model(input_ids)\n",
924
  " logits = outputs.logits\n",
925
  " \n",
926
  " # Apply softmax to get probabilities\n",
 
963
  }
964
  ],
965
  "source": [
966
+ "optimized_model.save_pretrained('./fine_tuned_distilbert')\n",
967
  "\n",
968
  "# Save the tokenizer\n",
969
  "tokenizer.save_pretrained('./fine_tuned_distilbert')"
 
978
  "from transformers import DistilBertForSequenceClassification, DistilBertTokenizer\n",
979
  "\n",
980
  "# Load the saved model\n",
981
+ "optimized_model = DistilBertForSequenceClassification.from_pretrained('./fine_tuned_distilbert')\n",
982
  "\n",
983
  "# Load the saved tokenizer\n",
984
  "tokenizer = DistilBertTokenizer.from_pretrained('./fine_tuned_distilbert')\n"
 
1009
  " inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)\n",
1010
  " input_ids = inputs['input_ids'].to(device)\n",
1011
  "\n",
1012
+ " optimized_model.eval()\n",
1013
  " with torch.no_grad():\n",
1014
+ " outputs = optimized_model(input_ids)\n",
1015
  " logits = outputs.logits\n",
1016
  " probabilities = softmax(logits, dim=-1)\n",
1017
  " \n",
 
1030
  " print(f\"{class_label}: {prob:.4f}\")"
1031
  ]
1032
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1033
  {
1034
  "cell_type": "code",
1035
  "execution_count": null,
t5_training.ipynb CHANGED
@@ -25,19 +25,22 @@
25
  "from transformers import T5ForConditionalGeneration, T5Tokenizer\n",
26
  "from datasets import Dataset\n",
27
  "from transformers import Trainer, TrainingArguments\n",
 
 
 
28
  "import json\n",
29
  "\n",
30
  "# Load pre-trained FLAN-T5 model and tokenizer\n",
31
  "model_name = \"google/flan-t5-large\" # FLAN-T5 Base Model\n",
32
  "tokenizer = T5Tokenizer.from_pretrained(model_name)\n",
33
  "model = T5ForConditionalGeneration.from_pretrained(model_name)\n",
34
- "\n",
35
  "# Example input-output pair for fine-tuning\n",
36
- "data = json.load('t5train.json')\n",
37
  "\n",
38
  "# Convert the data to a Hugging Face dataset\n",
39
  "dataset = Dataset.from_dict(data)\n",
40
- "\n",
41
  "# Tokenize the data\n",
42
  "def preprocess_function(examples):\n",
43
  " model_inputs = tokenizer(examples['input_text'], padding=\"max_length\", truncation=True, max_length=2048)\n",
@@ -71,7 +74,7 @@
71
  "\n",
72
  "# Initialize the Trainer class\n",
73
  "trainer = Trainer(\n",
74
- " model=model,\n",
75
  " args=training_args,\n",
76
  " train_dataset=tokenized_datasets,\n",
77
  " eval_dataset=tokenized_datasets # Use the same dataset for evaluation since we only have one data point\n",
@@ -82,17 +85,11 @@
82
  "\n",
83
  "# Save the fine-tuned model\n",
84
  "#trainer.save_model(\"./flan_t5_finetuned\")\n",
85
- "model.save_pretrained(\"./flan_t5_finetuned\")\n",
86
  "tokenizer.save_pretrained(\"./flan_t5_finetuned\")\n",
87
  "\n",
88
  "# Evaluate the model on the training data (for a single example)\n",
89
- "model.eval()\n",
90
- "inputs = tokenizer(\"What are the key differences between classification and regression tasks in supervised learning, and how do you determine which algorithm to use for a specific problem? e How does clustering differ from dimensionality reduction, and can you provide real-world examples of where each is applied?\", return_tensors=\"pt\", padding=True)\n",
91
- "outputs = model.generate(inputs['input_ids'], max_length=1024)\n",
92
- "\n",
93
- "# Decode the generated output\n",
94
- "generated_output = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
95
- "print(generated_output)"
96
  ]
97
  },
98
  {
@@ -110,14 +107,14 @@
110
  "\n",
111
  "# Load your FP32 model\n",
112
  "model_path = \"./flan_t5_finetuned\"\n",
113
- "model = T5ForConditionalGeneration.from_pretrained(model_path)\n",
114
  "tokenizer = T5Tokenizer.from_pretrained(model_path)\n",
115
  "\n",
116
  "# Define the quantization configuration\n",
117
  "quant_config = PostTrainingQuantConfig(approach='dynamic') # Dynamic quantization\n",
118
  "\n",
119
  "# Quantize the model\n",
120
- "q_model = fit(model=model, conf=quant_config)\n",
121
  "\n",
122
  "# Save the quantized model\n",
123
  "quantized_model_path = \"./flan_t5_quantized_fp16\"\n",
 
25
  "from transformers import T5ForConditionalGeneration, T5Tokenizer\n",
26
  "from datasets import Dataset\n",
27
  "from transformers import Trainer, TrainingArguments\n",
28
+ "import torch\n",
29
+ "from torch.utils.data import DataLoader\n",
30
+ "import intel_extension_for_pytorch as ipex\n",
31
  "import json\n",
32
  "\n",
33
  "# Load pre-trained FLAN-T5 model and tokenizer\n",
34
  "model_name = \"google/flan-t5-large\" # FLAN-T5 Base Model\n",
35
  "tokenizer = T5Tokenizer.from_pretrained(model_name)\n",
36
  "model = T5ForConditionalGeneration.from_pretrained(model_name)\n",
37
+ "optimized_model = ipex.optimize(model, dtype=torch.float32)\n",
38
  "# Example input-output pair for fine-tuning\n",
39
+ "data = json.load(\"t5train.json\")\n",
40
  "\n",
41
  "# Convert the data to a Hugging Face dataset\n",
42
  "dataset = Dataset.from_dict(data)\n",
43
+ "dataloader = DataLoader(dataset, num_workers=4, pin_memory=True)\n",
44
  "# Tokenize the data\n",
45
  "def preprocess_function(examples):\n",
46
  " model_inputs = tokenizer(examples['input_text'], padding=\"max_length\", truncation=True, max_length=2048)\n",
 
74
  "\n",
75
  "# Initialize the Trainer class\n",
76
  "trainer = Trainer(\n",
77
+ " model=optimized_model,\n",
78
  " args=training_args,\n",
79
  " train_dataset=tokenized_datasets,\n",
80
  " eval_dataset=tokenized_datasets # Use the same dataset for evaluation since we only have one data point\n",
 
85
  "\n",
86
  "# Save the fine-tuned model\n",
87
  "#trainer.save_model(\"./flan_t5_finetuned\")\n",
88
+ "optimized_model.save_pretrained(\"./flan_t5_finetuned\")\n",
89
  "tokenizer.save_pretrained(\"./flan_t5_finetuned\")\n",
90
  "\n",
91
  "# Evaluate the model on the training data (for a single example)\n",
92
+ "optimized_model.eval()"
 
 
 
 
 
 
93
  ]
94
  },
95
  {
 
107
  "\n",
108
  "# Load your FP32 model\n",
109
  "model_path = \"./flan_t5_finetuned\"\n",
110
+ "optimized_model = T5ForConditionalGeneration.from_pretrained(model_path)\n",
111
  "tokenizer = T5Tokenizer.from_pretrained(model_path)\n",
112
  "\n",
113
  "# Define the quantization configuration\n",
114
  "quant_config = PostTrainingQuantConfig(approach='dynamic') # Dynamic quantization\n",
115
  "\n",
116
  "# Quantize the model\n",
117
+ "q_model = fit(model=optimized_model, conf=quant_config)\n",
118
  "\n",
119
  "# Save the quantized model\n",
120
  "quantized_model_path = \"./flan_t5_quantized_fp16\"\n",