chaseharmon commited on
Commit
62ed4f6
·
1 Parent(s): 50e2273
Files changed (1) hide show
  1. app.py +2 -0
app.py CHANGED
@@ -17,6 +17,7 @@ def load_model():
17
  model = AutoModelForCausalLM.from_pretrained(
18
  base_model_name,
19
  device_map='auto',
 
20
  )
21
  model.config.use_cache = False
22
  model.config.pretraining_tp = 1
@@ -49,6 +50,7 @@ display = "Ask Rap-Mistral Something"
49
  st.write(display)
50
  question = st.chat_input("Write a verse in the style of Lupe Fiasco")
51
  if question:
 
52
  prompt = build_prompt(question)
53
  inputs = tokenizer(prompt, return_tensors="pt")
54
  model_inputs = inputs.to('cuda')
 
17
  model = AutoModelForCausalLM.from_pretrained(
18
  base_model_name,
19
  device_map='auto',
20
+ quantization_config=nf4_config,
21
  )
22
  model.config.use_cache = False
23
  model.config.pretraining_tp = 1
 
50
  st.write(display)
51
  question = st.chat_input("Write a verse in the style of Lupe Fiasco")
52
  if question:
53
+ display = "Loading..."
54
  prompt = build_prompt(question)
55
  inputs = tokenizer(prompt, return_tensors="pt")
56
  model_inputs = inputs.to('cuda')