drmasad commited on
Commit
c69da53
·
verified ·
1 Parent(s): 7d9d9be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -13
app.py CHANGED
@@ -46,12 +46,7 @@ def load_model(selected_model_name):
46
  st.info("Loading the model, please wait...")
47
  model_name = model_links[selected_model_name]
48
 
49
- # Set default device for all tensor operations
50
- torch.set_default_tensor_type('torch.FloatTensor')
51
- if torch.cuda.is_available():
52
- torch.set_default_tensor_type('torch.cuda.FloatTensor')
53
-
54
- # Define configuration for loading the model
55
  bnb_config = BitsAndBytesConfig(
56
  load_in_4bit=True,
57
  bnb_4bit_quant_type="nf4",
@@ -60,19 +55,21 @@ def load_model(selected_model_name):
60
  llm_int8_enable_fp32_cpu_offload=True,
61
  )
62
 
63
- # Load the model
 
 
 
 
 
 
 
64
  model = AutoModelForCausalLM.from_pretrained(
65
  model_name,
66
  quantization_config=bnb_config,
 
67
  trust_remote_code=True,
68
  )
69
 
70
- # Explicitly move the model to the correct device
71
- if torch.cuda.is_available():
72
- model.cuda() # Move model to GPU
73
- else:
74
- model.cpu() # Move model to CPU
75
-
76
  model.config.use_cache = False
77
  model = prepare_model_for_kbit_training(model)
78
 
@@ -95,6 +92,7 @@ def load_model(selected_model_name):
95
 
96
  return model, tokenizer
97
 
 
98
  # Load model and tokenizer
99
  model, tokenizer = load_model(selected_model)
100
 
 
46
  st.info("Loading the model, please wait...")
47
  model_name = model_links[selected_model_name]
48
 
49
+ # Configure the quantization and device settings
 
 
 
 
 
50
  bnb_config = BitsAndBytesConfig(
51
  load_in_4bit=True,
52
  bnb_4bit_quant_type="nf4",
 
55
  llm_int8_enable_fp32_cpu_offload=True,
56
  )
57
 
58
+ # Device map to specify where each component should reside
59
+ device_map = {
60
+ 'encoder': 'cuda', # or 'cpu' if reducing GPU load is crucial
61
+ 'decoder': 'cpu',
62
+ 'embed_tokens': 'cpu'
63
+ }
64
+
65
+ # Load the model with the specified device map and quantization config
66
  model = AutoModelForCausalLM.from_pretrained(
67
  model_name,
68
  quantization_config=bnb_config,
69
+ device_map=device_map,
70
  trust_remote_code=True,
71
  )
72
 
 
 
 
 
 
 
73
  model.config.use_cache = False
74
  model = prepare_model_for_kbit_training(model)
75
 
 
92
 
93
  return model, tokenizer
94
 
95
+
96
  # Load model and tokenizer
97
  model, tokenizer = load_model(selected_model)
98