gufett0 commited on
Commit
8c678cf
·
1 Parent(s): ea01567

switched to zerogpu

Browse files
Files changed (2) hide show
  1. backend.py +4 -3
  2. requirements.txt +2 -1
backend.py CHANGED
@@ -10,6 +10,7 @@ from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader,
10
  from llama_index.core.node_parser import SentenceSplitter
11
  from huggingface_hub import hf_hub_download
12
  from llama_cpp import Llama
 
13
 
14
 
15
  huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
@@ -24,8 +25,8 @@ hf_hub_download(
24
 
25
  llm = Llama(
26
  model_path=f"models/2b_it_v2.gguf",
27
- #flash_attn=True,
28
- #_gpu_layers=81,
29
  n_batch=1024,
30
  n_ctx=8192,
31
  )
@@ -61,7 +62,7 @@ nodes = parser.get_nodes_from_documents(documents)
61
  # BUILD A VECTOR STORE
62
  index = VectorStoreIndex(nodes)
63
 
64
-
65
  def handle_query(query_str, chathistory):
66
 
67
  qa_prompt_str = (
 
10
  from llama_index.core.node_parser import SentenceSplitter
11
  from huggingface_hub import hf_hub_download
12
  from llama_cpp import Llama
13
+ import spaces
14
 
15
 
16
  huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
 
25
 
26
  llm = Llama(
27
  model_path=f"models/2b_it_v2.gguf",
28
+ flash_attn=True,
29
+ _gpu_layers=81,
30
  n_batch=1024,
31
  n_ctx=8192,
32
  )
 
62
  # BUILD A VECTOR STORE
63
  index = VectorStoreIndex(nodes)
64
 
65
+ @spaces.GPU(duration=120)
66
  def handle_query(query_str, chathistory):
67
 
68
  qa_prompt_str = (
requirements.txt CHANGED
@@ -9,4 +9,5 @@ llama-index-readers-file
9
  gradio
10
  transformers
11
  llama-cpp-agent>=0.2.25
12
- setuptools
 
 
9
  gradio
10
  transformers
11
  llama-cpp-agent>=0.2.25
12
+ setuptools
13
+ spaces