rphrp1985 commited on
Commit
3ae4a47
·
verified ·
1 Parent(s): 5cfe646

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -39,6 +39,8 @@ tokenizer = AutoTokenizer.from_pretrained(
39
  model_id
40
  , token= token,)
41
 
 
 
42
  with init_empty_weights():
43
  model = AutoModelForCausalLM.from_pretrained(model_id, token= token,
44
  # torch_dtype= torch.uint8,
@@ -47,7 +49,7 @@ with init_empty_weights():
47
  attn_implementation="flash_attention_2",
48
  # low_cpu_mem_usage=True,
49
  # llm_int8_enable_fp32_cpu_offload=True,
50
- # device_map="cuda",
51
 
52
  )
53
 
@@ -58,8 +60,8 @@ with init_empty_weights():
58
  device_map = infer_auto_device_map(model, max_memory={0: "80GB", 1: "80GB", "cpu": "65GB"})
59
 
60
  # Load the model with the inferred device map
61
- model = load_checkpoint_and_dispatch(model, model_id, device_map=device_map, no_split_module_classes=["GPTJBlock"])
62
- model.half()
63
 
64
 
65
 
 
39
  model_id
40
  , token= token,)
41
 
42
+ device_map = infer_auto_device_map(model, max_memory={0: "80GB", "cpu": "65GB"})
43
+
44
  with init_empty_weights():
45
  model = AutoModelForCausalLM.from_pretrained(model_id, token= token,
46
  # torch_dtype= torch.uint8,
 
49
  attn_implementation="flash_attention_2",
50
  # low_cpu_mem_usage=True,
51
  # llm_int8_enable_fp32_cpu_offload=True,
52
+ device_map=device_map,
53
 
54
  )
55
 
 
60
  device_map = infer_auto_device_map(model, max_memory={0: "80GB", 1: "80GB", "cpu": "65GB"})
61
 
62
  # Load the model with the inferred device map
63
+ # model = load_checkpoint_and_dispatch(model, model_id, device_map=device_map, no_split_module_classes=["GPTJBlock"])
64
+ # model.half()
65
 
66
 
67