dushuai112233 commited on
Commit
e03b1b6
·
verified ·
1 Parent(s): 0edd576

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -11
app.py CHANGED
@@ -1,17 +1,14 @@
1
- from accelerate import Accelerator
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
3
  from peft import LoraConfig, get_peft_model, TaskType
4
  from datasets import load_dataset
 
 
5
  import torch
6
-
7
  def main():
8
- # 初始化 Accelerator
9
- accelerator = Accelerator()
10
-
11
  # 基础模型位置
12
  model_name = "dushuai112233/Qwen2-1.5B-Instruct"
13
  # 设备
14
- device = accelerator.device
15
 
16
  # 加载分词器和模型
17
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
@@ -39,7 +36,6 @@ def main():
39
  def tokenize_function(examples):
40
  return tokenizer(examples['question'], padding='max_length', truncation=True, max_length=128)
41
 
42
- # 对训练集和验证集进行分词处理
43
  train_dataset = train_dataset.map(tokenize_function, batched=True)
44
  val_dataset = val_dataset.map(tokenize_function, batched=True)
45
 
@@ -56,9 +52,6 @@ def main():
56
  save_total_limit=2, # 最大保存模型数
57
  )
58
 
59
- # 将模型移到设备
60
- model.to(device)
61
-
62
  # Define the Trainer
63
  trainer = Trainer(
64
  model=model, # 训练的模型
@@ -75,4 +68,4 @@ def main():
75
  model.save_pretrained('./output')
76
 
77
  if __name__ == '__main__':
78
- main()
 
 
1
  from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
2
  from peft import LoraConfig, get_peft_model, TaskType
3
  from datasets import load_dataset
4
+ from torch.utils.tensorboard import SummaryWriter
5
+ import os
6
  import torch
 
7
  def main():
 
 
 
8
  # 基础模型位置
9
  model_name = "dushuai112233/Qwen2-1.5B-Instruct"
10
  # 设备
11
+ device = "cuda" if torch.cuda.is_available() else "cpu"
12
 
13
  # 加载分词器和模型
14
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 
36
  def tokenize_function(examples):
37
  return tokenizer(examples['question'], padding='max_length', truncation=True, max_length=128)
38
 
 
39
  train_dataset = train_dataset.map(tokenize_function, batched=True)
40
  val_dataset = val_dataset.map(tokenize_function, batched=True)
41
 
 
52
  save_total_limit=2, # 最大保存模型数
53
  )
54
 
 
 
 
55
  # Define the Trainer
56
  trainer = Trainer(
57
  model=model, # 训练的模型
 
68
  model.save_pretrained('./output')
69
 
70
  if __name__ == '__main__':
71
+ main()