frankmorales2020
commited on
Commit
•
c6a9002
1
Parent(s):
81230bf
Update README.md
Browse files
README.md
CHANGED
@@ -51,8 +51,9 @@ The following hyperparameters were used during training:
|
|
51 |
from transformers import TrainingArguments
|
52 |
|
53 |
args = TrainingArguments(
|
54 |
-
output_dir="Mistral-7B-text-to-sql-flash-attention-2-dataeval",
|
55 |
|
|
|
56 |
num_train_epochs=3, # number of training epochs
|
57 |
per_device_train_batch_size=3, # batch size per device during training
|
58 |
gradient_accumulation_steps=8, #2 # number of steps before performing a backward/update pass
|
|
|
51 |
from transformers import TrainingArguments
|
52 |
|
53 |
args = TrainingArguments(
|
54 |
+
output_dir="Mistral-7B-text-to-sql-flash-attention-2-dataeval",
|
55 |
|
56 |
+
|
57 |
num_train_epochs=3, # number of training epochs
|
58 |
per_device_train_batch_size=3, # batch size per device during training
|
59 |
gradient_accumulation_steps=8, #2 # number of steps before performing a backward/update pass
|