Kevin Fink
commited on
Commit
·
e585d7a
1
Parent(s):
a1b0975
init
Browse files
app.py
CHANGED
@@ -40,24 +40,23 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
|
|
40 |
|
41 |
# Tokenize the dataset
|
42 |
def tokenize_function(examples):
|
43 |
-
max_length =
|
44 |
# Assuming 'text' is the input and 'target' is the expected output
|
45 |
model_inputs = tokenizer(
|
46 |
examples['text'],
|
47 |
-
max_length=
|
48 |
padding=False, # Disable padding here, we will handle it later
|
49 |
truncation=True,
|
50 |
-
return_tensors="pt" # Return PyTorch tensors
|
51 |
)
|
52 |
|
53 |
# Setup the decoder input IDs (shifted right)
|
54 |
with tokenizer.as_target_tokenizer():
|
55 |
labels = tokenizer(
|
56 |
examples['target'],
|
57 |
-
max_length=
|
58 |
padding=False, # Disable padding here, we will handle it later
|
59 |
truncation=True,
|
60 |
-
|
61 |
)
|
62 |
|
63 |
# Add labels to the model inputs
|
|
|
40 |
|
41 |
# Tokenize the dataset
|
42 |
def tokenize_function(examples):
|
43 |
+
max_length = 128
|
44 |
# Assuming 'text' is the input and 'target' is the expected output
|
45 |
model_inputs = tokenizer(
|
46 |
examples['text'],
|
47 |
+
max_length=max_length, # Set to None for dynamic padding
|
48 |
padding=False, # Disable padding here, we will handle it later
|
49 |
truncation=True,
|
|
|
50 |
)
|
51 |
|
52 |
# Setup the decoder input IDs (shifted right)
|
53 |
with tokenizer.as_target_tokenizer():
|
54 |
labels = tokenizer(
|
55 |
examples['target'],
|
56 |
+
max_length=max_length, # Set to None for dynamic padding
|
57 |
padding=False, # Disable padding here, we will handle it later
|
58 |
truncation=True,
|
59 |
+
|
60 |
)
|
61 |
|
62 |
# Add labels to the model inputs
|