Alaaeldin commited on
Commit
61794f8
·
verified ·
1 Parent(s): 3bf9eda

Rename testcode.py to train.py

Browse files
Files changed (2) hide show
  1. testcode.py +0 -3
  2. train.py +53 -0
testcode.py DELETED
@@ -1,3 +0,0 @@
1
- print("Hello Word")
2
- print("test")
3
- print("Hello Word")
 
 
 
 
train.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # File 1: Model Repo Code (train.py)
2
+ # This file contains steps 1 to 4
3
+
4
+ from datasets import load_dataset
5
+ from transformers import AutoTokenizer, AutoModelForQuestionAnswering, TrainingArguments, Trainer
6
+
7
+ # Step 1: Load the Dataset
8
+ dataset = load_dataset("squad")
9
+
10
+ # Step 2: Preprocess the Dataset
11
+ tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
12
+
13
+ def preprocess_function(examples):
14
+ return tokenizer(
15
+ examples["question"],
16
+ examples["context"],
17
+ truncation=True,
18
+ max_length=384,
19
+ stride=128,
20
+ return_overflowing_tokens=True,
21
+ padding="max_length"
22
+ )
23
+
24
+ tokenized_dataset = dataset.map(preprocess_function, batched=True)
25
+
26
+ # Step 3: Train the Model
27
+ model = AutoModelForQuestionAnswering.from_pretrained("bert-base-uncased")
28
+
29
+ training_args = TrainingArguments(
30
+ output_dir="./results",
31
+ evaluation_strategy="epoch",
32
+ learning_rate=3e-5,
33
+ per_device_train_batch_size=16,
34
+ num_train_epochs=3,
35
+ weight_decay=0.01,
36
+ push_to_hub=True, # Automatically push to the Hugging Face Hub
37
+ hub_model_id="username/qa_model_repo" # Replace with your username and model repo name
38
+ )
39
+
40
+ trainer = Trainer(
41
+ model=model,
42
+ args=training_args,
43
+ train_dataset=tokenized_dataset["train"],
44
+ eval_dataset=tokenized_dataset["validation"],
45
+ )
46
+
47
+ trainer.train()
48
+
49
+ # Step 4: Push the Model and Tokenizer to Hugging Face Hub
50
+ model.push_to_hub("username/qa_model_repo")
51
+ tokenizer.push_to_hub("username/qa_model_repo")
52
+
53
+ print("Model and tokenizer pushed to Hugging Face Hub successfully!")