Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -42,14 +42,16 @@ st.sidebar.image("https://www.hmgaihub.com/untitled.png")
|
|
42 |
st.sidebar.markdown("*Generated content may be inaccurate or false.*")
|
43 |
st.sidebar.markdown("*This is an under development project.*")
|
44 |
|
45 |
-
# Function to load model
|
46 |
def load_model(selected_model_name):
|
47 |
st.info("Loading the model, please wait...")
|
48 |
model_name = model_links[selected_model_name]
|
49 |
|
50 |
-
# Set
|
51 |
-
|
|
|
|
|
52 |
|
|
|
53 |
bnb_config = BitsAndBytesConfig(
|
54 |
load_in_4bit=True,
|
55 |
bnb_4bit_quant_type="nf4",
|
@@ -58,18 +60,18 @@ def load_model(selected_model_name):
|
|
58 |
llm_int8_enable_fp32_cpu_offload=True,
|
59 |
)
|
60 |
|
61 |
-
|
62 |
-
'encoder.layer.0': 'cuda', # Keep specific parts on GPU
|
63 |
-
'decoder': 'cpu', # Offload others to CPU
|
64 |
-
}
|
65 |
-
|
66 |
model = AutoModelForCausalLM.from_pretrained(
|
67 |
model_name,
|
68 |
quantization_config=bnb_config,
|
69 |
-
device_map=device_map,
|
70 |
trust_remote_code=True,
|
71 |
)
|
72 |
|
|
|
|
|
|
|
|
|
|
|
73 |
|
74 |
model.config.use_cache = False
|
75 |
model = prepare_model_for_kbit_training(model)
|
@@ -88,6 +90,7 @@ def load_model(selected_model_name):
|
|
88 |
tokenizer = AutoTokenizer.from_pretrained(
|
89 |
"mistralai/Mistral-7B-Instruct-v0.2", trust_remote_code=True
|
90 |
)
|
|
|
91 |
st.success("Model is ready. Now we are ready!")
|
92 |
|
93 |
return model, tokenizer
|
|
|
42 |
st.sidebar.markdown("*Generated content may be inaccurate or false.*")
|
43 |
st.sidebar.markdown("*This is an under development project.*")
|
44 |
|
|
|
45 |
def load_model(selected_model_name):
|
46 |
st.info("Loading the model, please wait...")
|
47 |
model_name = model_links[selected_model_name]
|
48 |
|
49 |
+
# Set default device for all tensor operations
|
50 |
+
torch.set_default_tensor_type('torch.FloatTensor')
|
51 |
+
if torch.cuda.is_available():
|
52 |
+
torch.set_default_tensor_type('torch.cuda.FloatTensor')
|
53 |
|
54 |
+
# Define configuration for loading the model
|
55 |
bnb_config = BitsAndBytesConfig(
|
56 |
load_in_4bit=True,
|
57 |
bnb_4bit_quant_type="nf4",
|
|
|
60 |
llm_int8_enable_fp32_cpu_offload=True,
|
61 |
)
|
62 |
|
63 |
+
# Load the model
|
|
|
|
|
|
|
|
|
64 |
model = AutoModelForCausalLM.from_pretrained(
|
65 |
model_name,
|
66 |
quantization_config=bnb_config,
|
|
|
67 |
trust_remote_code=True,
|
68 |
)
|
69 |
|
70 |
+
# Explicitly move the model to the correct device
|
71 |
+
if torch.cuda.is_available():
|
72 |
+
model.cuda() # Move model to GPU
|
73 |
+
else:
|
74 |
+
model.cpu() # Move model to CPU
|
75 |
|
76 |
model.config.use_cache = False
|
77 |
model = prepare_model_for_kbit_training(model)
|
|
|
90 |
tokenizer = AutoTokenizer.from_pretrained(
|
91 |
"mistralai/Mistral-7B-Instruct-v0.2", trust_remote_code=True
|
92 |
)
|
93 |
+
|
94 |
st.success("Model is ready. Now we are ready!")
|
95 |
|
96 |
return model, tokenizer
|