# File 1: Model Repo Code (train.py) # This file contains steps 1 to 4 from datasets import load_dataset from transformers import AutoTokenizer, AutoModelForQuestionAnswering, TrainingArguments, Trainer # Step 1: Load the Dataset dataset = load_dataset("squad") # Step 2: Preprocess the Dataset tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") def preprocess_function(examples): return tokenizer( examples["question"], examples["context"], truncation=True, max_length=384, stride=128, return_overflowing_tokens=True, padding="max_length" ) tokenized_dataset = dataset.map(preprocess_function, batched=True) # Step 3: Train the Model model = AutoModelForQuestionAnswering.from_pretrained("bert-base-uncased") training_args = TrainingArguments( output_dir="./results", evaluation_strategy="epoch", learning_rate=3e-5, per_device_train_batch_size=16, num_train_epochs=3, weight_decay=0.01, push_to_hub=True, # Automatically push to the Hugging Face Hub hub_model_id="username/qa_model_repo" # Replace with your username and model repo name ) trainer = Trainer( model=model, args=training_args, train_dataset=tokenized_dataset["train"], eval_dataset=tokenized_dataset["validation"], ) trainer.train() # Step 4: Push the Model and Tokenizer to Hugging Face Hub model.push_to_hub("username/qa_model_repo") tokenizer.push_to_hub("username/qa_model_repo") print("Model and tokenizer pushed to Hugging Face Hub successfully!")