Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -11,10 +11,14 @@ model_name = "microsoft/DialoGPT-medium"
|
|
11 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
12 |
model = AutoModelForCausalLM.from_pretrained(model_name)
|
13 |
|
|
|
|
|
|
|
|
|
14 |
# Fine-tuning Function
|
15 |
def preprocess_data(example):
|
16 |
-
inputs = tokenizer(example['question'], truncation=True, padding=
|
17 |
-
outputs = tokenizer(example['answer'], truncation=True, padding=
|
18 |
inputs['labels'] = outputs['input_ids']
|
19 |
return inputs
|
20 |
|
|
|
11 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
12 |
model = AutoModelForCausalLM.from_pretrained(model_name)
|
13 |
|
14 |
+
# Assign Padding Token
|
15 |
+
if tokenizer.pad_token is None:
|
16 |
+
tokenizer.pad_token = tokenizer.eos_token # Use EOS token as padding token
|
17 |
+
|
18 |
# Fine-tuning Function
|
19 |
def preprocess_data(example):
|
20 |
+
inputs = tokenizer(example['question'], truncation=True, padding="max_length", max_length=128)
|
21 |
+
outputs = tokenizer(example['answer'], truncation=True, padding="max_length", max_length=128)
|
22 |
inputs['labels'] = outputs['input_ids']
|
23 |
return inputs
|
24 |
|