frankmorales2020
commited on
Commit
•
053207b
1
Parent(s):
4b47e25
Update README.md
Browse files
README.md
CHANGED
@@ -51,7 +51,8 @@ The following hyperparameters were used during training:
|
|
51 |
|
52 |
from transformers import TrainingArguments
|
53 |
args = TrainingArguments(
|
54 |
-
output_dir="Mistral-7B-text-to-sql-flash-attention-2-dataeval",
|
|
|
55 |
num_train_epochs=3, # number of training epochs
|
56 |
per_device_train_batch_size=3, # batch size per device during training
|
57 |
gradient_accumulation_steps=8, #2 # number of steps before performing a backward/update pass
|
|
|
51 |
|
52 |
from transformers import TrainingArguments
|
53 |
args = TrainingArguments(
|
54 |
+
output_dir="Mistral-7B-text-to-sql-flash-attention-2-dataeval",
|
55 |
+
|
56 |
num_train_epochs=3, # number of training epochs
|
57 |
per_device_train_batch_size=3, # batch size per device during training
|
58 |
gradient_accumulation_steps=8, #2 # number of steps before performing a backward/update pass
|