import torch from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments # Define the path to your questions file questions_file = 'C:\\Users\\money\\OneDrive\\Pictures\\Blank Model\\untrained\\New folder (3)\\questions.txt' # Load your data from the questions file with open(questions_file, 'r') as f: questions = f.read().splitlines() # Define your custom tokenizer def custom_tokenizer(text): """ Define your custom tokenizer function here """ return text.split() # Tokenize your questions tokenized_questions = [custom_tokenizer(question) for question in questions] # Load your custom model model = AutoModelForSeq2SeqLM.from_pretrained('C:\\Users\\money\\OneDrive\\Pictures\\Blank Model\\untrained model.pt') # Define the training arguments training_args = TrainingArguments( output_dir='./results', evaluation_strategy='epoch', learning_rate=2e-4, per_device_train_batch_size=16, per_device_eval_batch_size=16, num_train_epochs=1, weight_decay=0.01, ) # Define the trainer and train the model trainer = Trainer( model=model, args=training_args, train_dataset=tokenized_questions, ) trainer.train() # Save the trained model model_path = './trained_model' model.save_pretrained(model_path)