Spaces:
Running
Running
Upload 2 files
Browse files- distilbert_finetuing.ipynb +18 -140
- t5_training.ipynb +11 -14
distilbert_finetuing.ipynb
CHANGED
@@ -298,7 +298,8 @@
|
|
298 |
"source": [
|
299 |
"from transformers import DistilBertTokenizer\n",
|
300 |
"import torch\n",
|
301 |
-
"\n",
|
|
|
302 |
"# Load the DistilBERT tokenizer\n",
|
303 |
"tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')\n",
|
304 |
"\n",
|
@@ -366,7 +367,8 @@
|
|
366 |
"from transformers import DistilBertForSequenceClassification\n",
|
367 |
"\n",
|
368 |
"# Load the model with a classification head\n",
|
369 |
-
"model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=6) # 6 classes: 0 to 5\n"
|
|
|
370 |
]
|
371 |
},
|
372 |
{
|
@@ -424,12 +426,12 @@
|
|
424 |
"from torch.optim.lr_scheduler import StepLR\n",
|
425 |
"\n",
|
426 |
"# Set up the optimizer\n",
|
427 |
-
"optimizer = AdamW(
|
428 |
"\n",
|
429 |
"# Define the training loop\n",
|
430 |
"epochs = 1\n",
|
431 |
"device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n",
|
432 |
-
"
|
433 |
"\n",
|
434 |
"print(device)"
|
435 |
]
|
@@ -800,7 +802,7 @@
|
|
800 |
],
|
801 |
"source": [
|
802 |
"for epoch in range(epochs):\n",
|
803 |
-
"
|
804 |
" total_loss = 0\n",
|
805 |
" for batch in train_dataloader:\n",
|
806 |
" input_ids, labels = batch\n",
|
@@ -810,7 +812,7 @@
|
|
810 |
" optimizer.zero_grad()\n",
|
811 |
"\n",
|
812 |
" # Forward pass\n",
|
813 |
-
" outputs =
|
814 |
" loss = outputs.loss\n",
|
815 |
" total_loss += loss.item()\n",
|
816 |
"\n",
|
@@ -835,7 +837,7 @@
|
|
835 |
}
|
836 |
],
|
837 |
"source": [
|
838 |
-
"
|
839 |
"correct_predictions = 0\n",
|
840 |
"total_predictions = 0\n",
|
841 |
"\n",
|
@@ -844,7 +846,7 @@
|
|
844 |
" input_ids, labels = batch\n",
|
845 |
" input_ids, labels = input_ids.to(device), labels.to(device)\n",
|
846 |
" # Forward pass\n",
|
847 |
-
" outputs =
|
848 |
" predictions = torch.argmax(outputs.logits, dim=-1)\n",
|
849 |
"\n",
|
850 |
" correct_predictions += (predictions == labels).sum().item()\n",
|
@@ -872,9 +874,9 @@
|
|
872 |
" inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)\n",
|
873 |
" input_ids = inputs['input_ids'].to(device)\n",
|
874 |
" \n",
|
875 |
-
"
|
876 |
" with torch.no_grad():\n",
|
877 |
-
" outputs =
|
878 |
" prediction = torch.argmax(outputs.logits, dim=-1)\n",
|
879 |
" return prediction.item()\n",
|
880 |
"\n",
|
@@ -915,10 +917,10 @@
|
|
915 |
" inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)\n",
|
916 |
" input_ids = inputs['input_ids'].to(device)\n",
|
917 |
" \n",
|
918 |
-
"
|
919 |
" with torch.no_grad():\n",
|
920 |
" # Get the raw logits from the model\n",
|
921 |
-
" outputs =
|
922 |
" logits = outputs.logits\n",
|
923 |
" \n",
|
924 |
" # Apply softmax to get probabilities\n",
|
@@ -961,7 +963,7 @@
|
|
961 |
}
|
962 |
],
|
963 |
"source": [
|
964 |
-
"
|
965 |
"\n",
|
966 |
"# Save the tokenizer\n",
|
967 |
"tokenizer.save_pretrained('./fine_tuned_distilbert')"
|
@@ -976,7 +978,7 @@
|
|
976 |
"from transformers import DistilBertForSequenceClassification, DistilBertTokenizer\n",
|
977 |
"\n",
|
978 |
"# Load the saved model\n",
|
979 |
-
"
|
980 |
"\n",
|
981 |
"# Load the saved tokenizer\n",
|
982 |
"tokenizer = DistilBertTokenizer.from_pretrained('./fine_tuned_distilbert')\n"
|
@@ -1007,9 +1009,9 @@
|
|
1007 |
" inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)\n",
|
1008 |
" input_ids = inputs['input_ids'].to(device)\n",
|
1009 |
"\n",
|
1010 |
-
"
|
1011 |
" with torch.no_grad():\n",
|
1012 |
-
" outputs =
|
1013 |
" logits = outputs.logits\n",
|
1014 |
" probabilities = softmax(logits, dim=-1)\n",
|
1015 |
" \n",
|
@@ -1028,130 +1030,6 @@
|
|
1028 |
" print(f\"{class_label}: {prob:.4f}\")"
|
1029 |
]
|
1030 |
},
|
1031 |
-
{
|
1032 |
-
"cell_type": "code",
|
1033 |
-
"execution_count": 55,
|
1034 |
-
"metadata": {},
|
1035 |
-
"outputs": [],
|
1036 |
-
"source": [
|
1037 |
-
"e = ['@ What are the key differences between classification and regression tasks in supervised learning, and how do you determine which algorithm to use for a specific problem?',\n",
|
1038 |
-
" '@ How does clustering differ from dimensionality reduction, and can you provide real-world examples of where each is applied?',\n",
|
1039 |
-
" '@ What are common evaluation metrics for classification models, and how do precision, recall, and F1-score relate to each other?',\n",
|
1040 |
-
" '@ How do convolutional neural networks (CNNs) and recurrent neural networks (RNNs) differ in their architecture and applications?',\n",
|
1041 |
-
" '@ What steps can be taken to identify and mitigate bias in machine learning models, and why is this an important consideration?']"
|
1042 |
-
]
|
1043 |
-
},
|
1044 |
-
{
|
1045 |
-
"cell_type": "code",
|
1046 |
-
"execution_count": 56,
|
1047 |
-
"metadata": {},
|
1048 |
-
"outputs": [
|
1049 |
-
{
|
1050 |
-
"name": "stdout",
|
1051 |
-
"output_type": "stream",
|
1052 |
-
"text": [
|
1053 |
-
"{'Remembering': 0.10612957, 'Understanding': 0.019418646, 'Applying': 0.06178399, 'Analyzing': 0.06437193, 'Evaluating': 0.02016813, 'Creating': 0.7281277}\n",
|
1054 |
-
"{'Remembering': 0.0023775953, 'Understanding': 0.007248114, 'Applying': 0.030584276, 'Analyzing': 0.03784482, 'Evaluating': 0.011662786, 'Creating': 0.9102824}\n",
|
1055 |
-
"{'Remembering': 0.77779603, 'Understanding': 0.00137261, 'Applying': 0.030797651, 'Analyzing': 0.01779477, 'Evaluating': 0.015782129, 'Creating': 0.15645678}\n",
|
1056 |
-
"{'Remembering': 0.0041304147, 'Understanding': 0.0012872498, 'Applying': 0.0071271434, 'Analyzing': 0.08727108, 'Evaluating': 0.012631507, 'Creating': 0.8875526}\n",
|
1057 |
-
"{'Remembering': 0.02713421, 'Understanding': 0.0032449323, 'Applying': 0.0559042, 'Analyzing': 0.021534933, 'Evaluating': 0.015711982, 'Creating': 0.8764698}\n"
|
1058 |
-
]
|
1059 |
-
}
|
1060 |
-
],
|
1061 |
-
"source": [
|
1062 |
-
"for i in e:\n",
|
1063 |
-
" class_probabilities = predict_with_loaded_model(i)\n",
|
1064 |
-
" print(class_probabilities)"
|
1065 |
-
]
|
1066 |
-
},
|
1067 |
-
{
|
1068 |
-
"cell_type": "code",
|
1069 |
-
"execution_count": 67,
|
1070 |
-
"metadata": {},
|
1071 |
-
"outputs": [],
|
1072 |
-
"source": [
|
1073 |
-
"weights = {\n",
|
1074 |
-
" 'Remembering': 0.5,\n",
|
1075 |
-
" 'Understanding': 0.5,\n",
|
1076 |
-
" 'Applying': 0.5,\n",
|
1077 |
-
" 'Analyzing': 0.5,\n",
|
1078 |
-
" 'Evaluating': 0.5,\n",
|
1079 |
-
" 'Creating':0.5,\n",
|
1080 |
-
"}"
|
1081 |
-
]
|
1082 |
-
},
|
1083 |
-
{
|
1084 |
-
"cell_type": "code",
|
1085 |
-
"execution_count": 68,
|
1086 |
-
"metadata": {},
|
1087 |
-
"outputs": [],
|
1088 |
-
"source": [
|
1089 |
-
"questions = [\n",
|
1090 |
-
" {'Remembering': 0.10612957, 'Understanding': 0.019418646, 'Applying': 0.06178399, 'Analyzing': 0.06437193, 'Evaluating': 0.02016813, 'Creating': 0.7281277},\n",
|
1091 |
-
" {'Remembering': 0.0023775953, 'Understanding': 0.007248114, 'Applying': 0.030584276, 'Analyzing': 0.03784482, 'Evaluating': 0.011662786, 'Creating': 0.9102824},\n",
|
1092 |
-
" {'Remembering': 0.77779603, 'Understanding': 0.00137261, 'Applying': 0.030797651, 'Analyzing': 0.01779477, 'Evaluating': 0.015782129, 'Creating': 0.15645678},\n",
|
1093 |
-
" {'Remembering': 0.0041304147, 'Understanding': 0.0012872498, 'Applying': 0.0071271434, 'Analyzing': 0.08727108, 'Evaluating': 0.012631507, 'Creating': 0.8875526},\n",
|
1094 |
-
" {'Remembering': 0.02713421, 'Understanding': 0.0032449323, 'Applying': 0.0559042, 'Analyzing': 0.021534933, 'Evaluating': 0.015711982, 'Creating': 0.8764698}\n",
|
1095 |
-
"]"
|
1096 |
-
]
|
1097 |
-
},
|
1098 |
-
{
|
1099 |
-
"cell_type": "code",
|
1100 |
-
"execution_count": 69,
|
1101 |
-
"metadata": {},
|
1102 |
-
"outputs": [
|
1103 |
-
{
|
1104 |
-
"name": "stdout",
|
1105 |
-
"output_type": "stream",
|
1106 |
-
"text": [
|
1107 |
-
"2.49999998975 18.0 90.0\n",
|
1108 |
-
"Normalized Score of the Paper: 0.0278\n"
|
1109 |
-
]
|
1110 |
-
}
|
1111 |
-
],
|
1112 |
-
"source": [
|
1113 |
-
"def calculate_score(question, weights):\n",
|
1114 |
-
" score = sum(question[level] * weight for level, weight in weights.items())\n",
|
1115 |
-
" return score\n",
|
1116 |
-
"\n",
|
1117 |
-
"total_score = sum(calculate_score(q, weights) for q in questions)\n",
|
1118 |
-
"max_score_per_question = sum([weights[level] for level in weights]) * 6 \n",
|
1119 |
-
"max_total_score = max_score_per_question * len(questions) \n",
|
1120 |
-
"normalized_score = (total_score - 0) / (max_total_score - 0)\n",
|
1121 |
-
"print(total_score, max_score_per_question, max_total_score)\n",
|
1122 |
-
"print(f\"Normalized Score of the Paper: {normalized_score:.4f}\")"
|
1123 |
-
]
|
1124 |
-
},
|
1125 |
-
{
|
1126 |
-
"cell_type": "code",
|
1127 |
-
"execution_count": null,
|
1128 |
-
"metadata": {},
|
1129 |
-
"outputs": [],
|
1130 |
-
"source": []
|
1131 |
-
},
|
1132 |
-
{
|
1133 |
-
"cell_type": "code",
|
1134 |
-
"execution_count": 70,
|
1135 |
-
"metadata": {},
|
1136 |
-
"outputs": [
|
1137 |
-
{
|
1138 |
-
"name": "stdout",
|
1139 |
-
"output_type": "stream",
|
1140 |
-
"text": [
|
1141 |
-
"{'Remembering': 0.10612957, 'Understanding': 0.019418646, 'Applying': 0.06178399, 'Analyzing': 0.06437193, 'Evaluating': 0.02016813, 'Creating': 0.7281277}\n",
|
1142 |
-
"{'Remembering': 0.0023775953, 'Understanding': 0.007248114, 'Applying': 0.030584276, 'Analyzing': 0.03784482, 'Evaluating': 0.011662786, 'Creating': 0.9102824}\n",
|
1143 |
-
"{'Remembering': 0.77779603, 'Understanding': 0.00137261, 'Applying': 0.030797651, 'Analyzing': 0.01779477, 'Evaluating': 0.015782129, 'Creating': 0.15645678}\n",
|
1144 |
-
"{'Remembering': 0.0041304147, 'Understanding': 0.0012872498, 'Applying': 0.0071271434, 'Analyzing': 0.08727108, 'Evaluating': 0.012631507, 'Creating': 0.8875526}\n",
|
1145 |
-
"{'Remembering': 0.02713421, 'Understanding': 0.0032449323, 'Applying': 0.0559042, 'Analyzing': 0.021534933, 'Evaluating': 0.015711982, 'Creating': 0.8764698}\n"
|
1146 |
-
]
|
1147 |
-
}
|
1148 |
-
],
|
1149 |
-
"source": [
|
1150 |
-
"for i in e:\n",
|
1151 |
-
" class_probabilities = predict_with_loaded_model(i)\n",
|
1152 |
-
" print(class_probabilities)"
|
1153 |
-
]
|
1154 |
-
},
|
1155 |
{
|
1156 |
"cell_type": "code",
|
1157 |
"execution_count": null,
|
|
|
298 |
"source": [
|
299 |
"from transformers import DistilBertTokenizer\n",
|
300 |
"import torch\n",
|
301 |
+
"from torch.utils.data import DataLoader\n",
|
302 |
+
"import intel_extension_for_pytorch as ipex\n",
|
303 |
"# Load the DistilBERT tokenizer\n",
|
304 |
"tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')\n",
|
305 |
"\n",
|
|
|
367 |
"from transformers import DistilBertForSequenceClassification\n",
|
368 |
"\n",
|
369 |
"# Load the model with a classification head\n",
|
370 |
+
"model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=6) # 6 classes: 0 to 5\n",
|
371 |
+
"optimized_model = ipex.optimize(model, dtype=torch.float32)"
|
372 |
]
|
373 |
},
|
374 |
{
|
|
|
426 |
"from torch.optim.lr_scheduler import StepLR\n",
|
427 |
"\n",
|
428 |
"# Set up the optimizer\n",
|
429 |
+
"optimizer = AdamW(optimized_model.parameters(), lr=0.0001)\n",
|
430 |
"\n",
|
431 |
"# Define the training loop\n",
|
432 |
"epochs = 1\n",
|
433 |
"device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n",
|
434 |
+
"optimized_model.to(device)\n",
|
435 |
"\n",
|
436 |
"print(device)"
|
437 |
]
|
|
|
802 |
],
|
803 |
"source": [
|
804 |
"for epoch in range(epochs):\n",
|
805 |
+
" optimized_model.train()\n",
|
806 |
" total_loss = 0\n",
|
807 |
" for batch in train_dataloader:\n",
|
808 |
" input_ids, labels = batch\n",
|
|
|
812 |
" optimizer.zero_grad()\n",
|
813 |
"\n",
|
814 |
" # Forward pass\n",
|
815 |
+
" outputs = optimized_model(input_ids, labels=labels)\n",
|
816 |
" loss = outputs.loss\n",
|
817 |
" total_loss += loss.item()\n",
|
818 |
"\n",
|
|
|
837 |
}
|
838 |
],
|
839 |
"source": [
|
840 |
+
"optimized_model.eval()\n",
|
841 |
"correct_predictions = 0\n",
|
842 |
"total_predictions = 0\n",
|
843 |
"\n",
|
|
|
846 |
" input_ids, labels = batch\n",
|
847 |
" input_ids, labels = input_ids.to(device), labels.to(device)\n",
|
848 |
" # Forward pass\n",
|
849 |
+
" outputs = optimized_model(input_ids)\n",
|
850 |
" predictions = torch.argmax(outputs.logits, dim=-1)\n",
|
851 |
"\n",
|
852 |
" correct_predictions += (predictions == labels).sum().item()\n",
|
|
|
874 |
" inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)\n",
|
875 |
" input_ids = inputs['input_ids'].to(device)\n",
|
876 |
" \n",
|
877 |
+
" optimized_model.eval()\n",
|
878 |
" with torch.no_grad():\n",
|
879 |
+
" outputs = optimized_model(input_ids)\n",
|
880 |
" prediction = torch.argmax(outputs.logits, dim=-1)\n",
|
881 |
" return prediction.item()\n",
|
882 |
"\n",
|
|
|
917 |
" inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)\n",
|
918 |
" input_ids = inputs['input_ids'].to(device)\n",
|
919 |
" \n",
|
920 |
+
" optimized_model.eval()\n",
|
921 |
" with torch.no_grad():\n",
|
922 |
" # Get the raw logits from the model\n",
|
923 |
+
" outputs = optimized_model(input_ids)\n",
|
924 |
" logits = outputs.logits\n",
|
925 |
" \n",
|
926 |
" # Apply softmax to get probabilities\n",
|
|
|
963 |
}
|
964 |
],
|
965 |
"source": [
|
966 |
+
"optimized_model.save_pretrained('./fine_tuned_distilbert')\n",
|
967 |
"\n",
|
968 |
"# Save the tokenizer\n",
|
969 |
"tokenizer.save_pretrained('./fine_tuned_distilbert')"
|
|
|
978 |
"from transformers import DistilBertForSequenceClassification, DistilBertTokenizer\n",
|
979 |
"\n",
|
980 |
"# Load the saved model\n",
|
981 |
+
"optimized_model = DistilBertForSequenceClassification.from_pretrained('./fine_tuned_distilbert')\n",
|
982 |
"\n",
|
983 |
"# Load the saved tokenizer\n",
|
984 |
"tokenizer = DistilBertTokenizer.from_pretrained('./fine_tuned_distilbert')\n"
|
|
|
1009 |
" inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)\n",
|
1010 |
" input_ids = inputs['input_ids'].to(device)\n",
|
1011 |
"\n",
|
1012 |
+
" optimized_model.eval()\n",
|
1013 |
" with torch.no_grad():\n",
|
1014 |
+
" outputs = optimized_model(input_ids)\n",
|
1015 |
" logits = outputs.logits\n",
|
1016 |
" probabilities = softmax(logits, dim=-1)\n",
|
1017 |
" \n",
|
|
|
1030 |
" print(f\"{class_label}: {prob:.4f}\")"
|
1031 |
]
|
1032 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1033 |
{
|
1034 |
"cell_type": "code",
|
1035 |
"execution_count": null,
|
t5_training.ipynb
CHANGED
@@ -25,19 +25,22 @@
|
|
25 |
"from transformers import T5ForConditionalGeneration, T5Tokenizer\n",
|
26 |
"from datasets import Dataset\n",
|
27 |
"from transformers import Trainer, TrainingArguments\n",
|
|
|
|
|
|
|
28 |
"import json\n",
|
29 |
"\n",
|
30 |
"# Load pre-trained FLAN-T5 model and tokenizer\n",
|
31 |
"model_name = \"google/flan-t5-large\" # FLAN-T5 Base Model\n",
|
32 |
"tokenizer = T5Tokenizer.from_pretrained(model_name)\n",
|
33 |
"model = T5ForConditionalGeneration.from_pretrained(model_name)\n",
|
34 |
-
"\n",
|
35 |
"# Example input-output pair for fine-tuning\n",
|
36 |
-
"data = json.load(
|
37 |
"\n",
|
38 |
"# Convert the data to a Hugging Face dataset\n",
|
39 |
"dataset = Dataset.from_dict(data)\n",
|
40 |
-
"\n",
|
41 |
"# Tokenize the data\n",
|
42 |
"def preprocess_function(examples):\n",
|
43 |
" model_inputs = tokenizer(examples['input_text'], padding=\"max_length\", truncation=True, max_length=2048)\n",
|
@@ -71,7 +74,7 @@
|
|
71 |
"\n",
|
72 |
"# Initialize the Trainer class\n",
|
73 |
"trainer = Trainer(\n",
|
74 |
-
" model=
|
75 |
" args=training_args,\n",
|
76 |
" train_dataset=tokenized_datasets,\n",
|
77 |
" eval_dataset=tokenized_datasets # Use the same dataset for evaluation since we only have one data point\n",
|
@@ -82,17 +85,11 @@
|
|
82 |
"\n",
|
83 |
"# Save the fine-tuned model\n",
|
84 |
"#trainer.save_model(\"./flan_t5_finetuned\")\n",
|
85 |
-
"
|
86 |
"tokenizer.save_pretrained(\"./flan_t5_finetuned\")\n",
|
87 |
"\n",
|
88 |
"# Evaluate the model on the training data (for a single example)\n",
|
89 |
-
"
|
90 |
-
"inputs = tokenizer(\"What are the key differences between classification and regression tasks in supervised learning, and how do you determine which algorithm to use for a specific problem? e How does clustering differ from dimensionality reduction, and can you provide real-world examples of where each is applied?\", return_tensors=\"pt\", padding=True)\n",
|
91 |
-
"outputs = model.generate(inputs['input_ids'], max_length=1024)\n",
|
92 |
-
"\n",
|
93 |
-
"# Decode the generated output\n",
|
94 |
-
"generated_output = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
|
95 |
-
"print(generated_output)"
|
96 |
]
|
97 |
},
|
98 |
{
|
@@ -110,14 +107,14 @@
|
|
110 |
"\n",
|
111 |
"# Load your FP32 model\n",
|
112 |
"model_path = \"./flan_t5_finetuned\"\n",
|
113 |
-
"
|
114 |
"tokenizer = T5Tokenizer.from_pretrained(model_path)\n",
|
115 |
"\n",
|
116 |
"# Define the quantization configuration\n",
|
117 |
"quant_config = PostTrainingQuantConfig(approach='dynamic') # Dynamic quantization\n",
|
118 |
"\n",
|
119 |
"# Quantize the model\n",
|
120 |
-
"q_model = fit(model=
|
121 |
"\n",
|
122 |
"# Save the quantized model\n",
|
123 |
"quantized_model_path = \"./flan_t5_quantized_fp16\"\n",
|
|
|
25 |
"from transformers import T5ForConditionalGeneration, T5Tokenizer\n",
|
26 |
"from datasets import Dataset\n",
|
27 |
"from transformers import Trainer, TrainingArguments\n",
|
28 |
+
"import torch\n",
|
29 |
+
"from torch.utils.data import DataLoader\n",
|
30 |
+
"import intel_extension_for_pytorch as ipex\n",
|
31 |
"import json\n",
|
32 |
"\n",
|
33 |
"# Load pre-trained FLAN-T5 model and tokenizer\n",
|
34 |
"model_name = \"google/flan-t5-large\" # FLAN-T5 Base Model\n",
|
35 |
"tokenizer = T5Tokenizer.from_pretrained(model_name)\n",
|
36 |
"model = T5ForConditionalGeneration.from_pretrained(model_name)\n",
|
37 |
+
"optimized_model = ipex.optimize(model, dtype=torch.float32)\n",
|
38 |
"# Example input-output pair for fine-tuning\n",
|
39 |
+
"data = json.load(\"t5train.json\")\n",
|
40 |
"\n",
|
41 |
"# Convert the data to a Hugging Face dataset\n",
|
42 |
"dataset = Dataset.from_dict(data)\n",
|
43 |
+
"dataloader = DataLoader(dataset, num_workers=4, pin_memory=True)\n",
|
44 |
"# Tokenize the data\n",
|
45 |
"def preprocess_function(examples):\n",
|
46 |
" model_inputs = tokenizer(examples['input_text'], padding=\"max_length\", truncation=True, max_length=2048)\n",
|
|
|
74 |
"\n",
|
75 |
"# Initialize the Trainer class\n",
|
76 |
"trainer = Trainer(\n",
|
77 |
+
" model=optimized_model,\n",
|
78 |
" args=training_args,\n",
|
79 |
" train_dataset=tokenized_datasets,\n",
|
80 |
" eval_dataset=tokenized_datasets # Use the same dataset for evaluation since we only have one data point\n",
|
|
|
85 |
"\n",
|
86 |
"# Save the fine-tuned model\n",
|
87 |
"#trainer.save_model(\"./flan_t5_finetuned\")\n",
|
88 |
+
"optimized_model.save_pretrained(\"./flan_t5_finetuned\")\n",
|
89 |
"tokenizer.save_pretrained(\"./flan_t5_finetuned\")\n",
|
90 |
"\n",
|
91 |
"# Evaluate the model on the training data (for a single example)\n",
|
92 |
+
"optimized_model.eval()"
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
]
|
94 |
},
|
95 |
{
|
|
|
107 |
"\n",
|
108 |
"# Load your FP32 model\n",
|
109 |
"model_path = \"./flan_t5_finetuned\"\n",
|
110 |
+
"optimized_model = T5ForConditionalGeneration.from_pretrained(model_path)\n",
|
111 |
"tokenizer = T5Tokenizer.from_pretrained(model_path)\n",
|
112 |
"\n",
|
113 |
"# Define the quantization configuration\n",
|
114 |
"quant_config = PostTrainingQuantConfig(approach='dynamic') # Dynamic quantization\n",
|
115 |
"\n",
|
116 |
"# Quantize the model\n",
|
117 |
+
"q_model = fit(model=optimized_model, conf=quant_config)\n",
|
118 |
"\n",
|
119 |
"# Save the quantized model\n",
|
120 |
"quantized_model_path = \"./flan_t5_quantized_fp16\"\n",
|