Tijmen2 commited on
Commit
d8c999b
·
verified ·
1 Parent(s): da4a62d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -7
app.py CHANGED
@@ -9,13 +9,18 @@ model_path = hf_hub_download(
9
  filename="AstroSage-8B-Q8_0.gguf"
10
  )
11
 
12
- llm = Llama(
13
- model_path=model_path,
14
- n_ctx=2048,
15
- chat_format="llama-3",
16
- n_gpu_layers=-1, # ensure all layers are on GPU
17
- split_mode=0,
18
- )
 
 
 
 
 
19
 
20
  # Placeholder responses for when context is empty
21
  GREETING_MESSAGES = [
 
9
  filename="AstroSage-8B-Q8_0.gguf"
10
  )
11
 
12
+ @space.GPU
13
+ def load_llm():
14
+ llm = Llama(
15
+ model_path=model_path,
16
+ n_ctx=2048,
17
+ chat_format="llama-3",
18
+ n_gpu_layers=-1, # ensure all layers are on GPU
19
+ split_mode=0,
20
+ )
21
+ return llm
22
+
23
+ llm = load_llm()
24
 
25
  # Placeholder responses for when context is empty
26
  GREETING_MESSAGES = [