Saad381 commited on
Commit
d2a3481
·
verified ·
1 Parent(s): e1ecdf3

Delete train.py

Browse files
Files changed (1) hide show
  1. train.py +0 -40
train.py DELETED
@@ -1,40 +0,0 @@
1
- from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
2
- from datasets import load_dataset
3
-
4
- # Load the model and tokenizer
5
- model = AutoModelForCausalLM.from_pretrained("Saad381/SpectraGen")
6
- tokenizer = AutoTokenizer.from_pretrained("Saad381/SpectraGen")
7
-
8
- # Load your dataset (CSV file assumed here)
9
- dataset = load_dataset('csv', data_files='dataset.csv')
10
-
11
- # Tokenize your dataset
12
- def tokenize_function(examples):
13
- return tokenizer(examples["text"], padding="max_length", truncation=True)
14
-
15
- tokenized_datasets = dataset.map(tokenize_function, batched=True)
16
-
17
- # Define training arguments
18
- training_args = TrainingArguments(
19
- output_dir='./results', # output directory
20
- evaluation_strategy="epoch", # evaluate at end of each epoch
21
- per_device_train_batch_size=8, # batch size
22
- num_train_epochs=3, # number of training epochs
23
- save_steps=10_000, # steps to save checkpoint
24
- save_total_limit=2, # limit the total amount of checkpoints
25
- )
26
-
27
- # Initialize the Trainer
28
- trainer = Trainer(
29
- model=model,
30
- args=training_args,
31
- train_dataset=tokenized_datasets["train"],
32
- eval_dataset=tokenized_datasets["test"]
33
- )
34
-
35
- # Train the model
36
- trainer.train()
37
-
38
- # Save the model
39
- model.save_pretrained('./trained_model')
40
- tokenizer.save_pretrained('./trained_model')