frankmorales2020
commited on
Commit
•
4b47e25
1
Parent(s):
0f028c0
Update README.md
Browse files
README.md
CHANGED
@@ -50,11 +50,8 @@ The following hyperparameters were used during training:
|
|
50 |
- num_epochs: 3
|
51 |
|
52 |
from transformers import TrainingArguments
|
53 |
-
|
54 |
args = TrainingArguments(
|
55 |
-
output_dir="Mistral-7B-text-to-sql-flash-attention-2-dataeval",
|
56 |
-
|
57 |
-
|
58 |
num_train_epochs=3, # number of training epochs
|
59 |
per_device_train_batch_size=3, # batch size per device during training
|
60 |
gradient_accumulation_steps=8, #2 # number of steps before performing a backward/update pass
|
@@ -74,7 +71,6 @@ args = TrainingArguments(
|
|
74 |
hub_token=access_token_write, # Add this line
|
75 |
load_best_model_at_end=True,
|
76 |
logging_dir="/content/gdrive/MyDrive/model/Mistral-7B-text-to-sql-flash-attention-2-dataeval/logs",
|
77 |
-
|
78 |
evaluation_strategy="steps",
|
79 |
eval_steps=10,
|
80 |
save_strategy="steps",
|
|
|
50 |
- num_epochs: 3
|
51 |
|
52 |
from transformers import TrainingArguments
|
|
|
53 |
args = TrainingArguments(
|
54 |
+
output_dir="Mistral-7B-text-to-sql-flash-attention-2-dataeval",
|
|
|
|
|
55 |
num_train_epochs=3, # number of training epochs
|
56 |
per_device_train_batch_size=3, # batch size per device during training
|
57 |
gradient_accumulation_steps=8, #2 # number of steps before performing a backward/update pass
|
|
|
71 |
hub_token=access_token_write, # Add this line
|
72 |
load_best_model_at_end=True,
|
73 |
logging_dir="/content/gdrive/MyDrive/model/Mistral-7B-text-to-sql-flash-attention-2-dataeval/logs",
|
|
|
74 |
evaluation_strategy="steps",
|
75 |
eval_steps=10,
|
76 |
save_strategy="steps",
|