Commit
·
62ed4f6
1
Parent(s):
50e2273
quant
Browse files
app.py
CHANGED
@@ -17,6 +17,7 @@ def load_model():
|
|
17 |
model = AutoModelForCausalLM.from_pretrained(
|
18 |
base_model_name,
|
19 |
device_map='auto',
|
|
|
20 |
)
|
21 |
model.config.use_cache = False
|
22 |
model.config.pretraining_tp = 1
|
@@ -49,6 +50,7 @@ display = "Ask Rap-Mistral Something"
|
|
49 |
st.write(display)
|
50 |
question = st.chat_input("Write a verse in the style of Lupe Fiasco")
|
51 |
if question:
|
|
|
52 |
prompt = build_prompt(question)
|
53 |
inputs = tokenizer(prompt, return_tensors="pt")
|
54 |
model_inputs = inputs.to('cuda')
|
|
|
17 |
model = AutoModelForCausalLM.from_pretrained(
|
18 |
base_model_name,
|
19 |
device_map='auto',
|
20 |
+
quantization_config=nf4_config,
|
21 |
)
|
22 |
model.config.use_cache = False
|
23 |
model.config.pretraining_tp = 1
|
|
|
50 |
st.write(display)
|
51 |
question = st.chat_input("Write a verse in the style of Lupe Fiasco")
|
52 |
if question:
|
53 |
+
display = "Loading..."
|
54 |
prompt = build_prompt(question)
|
55 |
inputs = tokenizer(prompt, return_tensors="pt")
|
56 |
model_inputs = inputs.to('cuda')
|