Spaces:
Configuration error
Configuration error
import torch | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments | |
# Define the path to your questions file | |
questions_file = 'C:\\Users\\money\\OneDrive\\Pictures\\Blank Model\\untrained\\New folder (3)\\questions.txt' | |
# Load your data from the questions file | |
with open(questions_file, 'r') as f: | |
questions = f.read().splitlines() | |
# Define your custom tokenizer | |
def custom_tokenizer(text): | |
""" | |
Define your custom tokenizer function here | |
""" | |
return text.split() | |
# Tokenize your questions | |
tokenized_questions = [custom_tokenizer(question) for question in questions] | |
# Load your custom model | |
model = AutoModelForSeq2SeqLM.from_pretrained('C:\\Users\\money\\OneDrive\\Pictures\\Blank Model\\untrained model.pt') | |
# Define the training arguments | |
training_args = TrainingArguments( | |
output_dir='./results', | |
evaluation_strategy='epoch', | |
learning_rate=2e-4, | |
per_device_train_batch_size=16, | |
per_device_eval_batch_size=16, | |
num_train_epochs=1, | |
weight_decay=0.01, | |
) | |
# Define the trainer and train the model | |
trainer = Trainer( | |
model=model, | |
args=training_args, | |
train_dataset=tokenized_questions, | |
) | |
trainer.train() | |
# Save the trained model | |
model_path = './trained_model' | |
model.save_pretrained(model_path) | |