Spaces:
Sleeping
Sleeping
switched to zerogpu
Browse files- backend.py +4 -3
- requirements.txt +2 -1
backend.py
CHANGED
@@ -10,6 +10,7 @@ from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader,
|
|
10 |
from llama_index.core.node_parser import SentenceSplitter
|
11 |
from huggingface_hub import hf_hub_download
|
12 |
from llama_cpp import Llama
|
|
|
13 |
|
14 |
|
15 |
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
|
@@ -24,8 +25,8 @@ hf_hub_download(
|
|
24 |
|
25 |
llm = Llama(
|
26 |
model_path=f"models/2b_it_v2.gguf",
|
27 |
-
|
28 |
-
|
29 |
n_batch=1024,
|
30 |
n_ctx=8192,
|
31 |
)
|
@@ -61,7 +62,7 @@ nodes = parser.get_nodes_from_documents(documents)
|
|
61 |
# BUILD A VECTOR STORE
|
62 |
index = VectorStoreIndex(nodes)
|
63 |
|
64 |
-
|
65 |
def handle_query(query_str, chathistory):
|
66 |
|
67 |
qa_prompt_str = (
|
|
|
10 |
from llama_index.core.node_parser import SentenceSplitter
|
11 |
from huggingface_hub import hf_hub_download
|
12 |
from llama_cpp import Llama
|
13 |
+
import spaces
|
14 |
|
15 |
|
16 |
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
|
|
|
25 |
|
26 |
llm = Llama(
|
27 |
model_path=f"models/2b_it_v2.gguf",
|
28 |
+
flash_attn=True,
|
29 |
+
_gpu_layers=81,
|
30 |
n_batch=1024,
|
31 |
n_ctx=8192,
|
32 |
)
|
|
|
62 |
# BUILD A VECTOR STORE
|
63 |
index = VectorStoreIndex(nodes)
|
64 |
|
65 |
+
@spaces.GPU(duration=120)
|
66 |
def handle_query(query_str, chathistory):
|
67 |
|
68 |
qa_prompt_str = (
|
requirements.txt
CHANGED
@@ -9,4 +9,5 @@ llama-index-readers-file
|
|
9 |
gradio
|
10 |
transformers
|
11 |
llama-cpp-agent>=0.2.25
|
12 |
-
setuptools
|
|
|
|
9 |
gradio
|
10 |
transformers
|
11 |
llama-cpp-agent>=0.2.25
|
12 |
+
setuptools
|
13 |
+
spaces
|