Spaces:

gufett0
/

chatbot-llamaindex

Sleeping

gufett0 commited on Sep 13, 2024

Commit

8c678cf

1 Parent(s): ea01567

switched to zerogpu

Files changed (2) hide show

backend.py CHANGED Viewed

@@ -10,6 +10,7 @@ from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader,
 from llama_index.core.node_parser import SentenceSplitter
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
 huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
@@ -24,8 +25,8 @@ hf_hub_download(
 llm = Llama(
             model_path=f"models/2b_it_v2.gguf",
-            #flash_attn=True,
-            #_gpu_layers=81,
             n_batch=1024,
             n_ctx=8192,
         )
@@ -61,7 +62,7 @@ nodes = parser.get_nodes_from_documents(documents)
 # BUILD A VECTOR STORE
 index = VectorStoreIndex(nodes)
 def handle_query(query_str, chathistory):
     qa_prompt_str = (

 from llama_index.core.node_parser import SentenceSplitter
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
+import spaces
 huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
 llm = Llama(
             model_path=f"models/2b_it_v2.gguf",
+            flash_attn=True,
+            _gpu_layers=81,
             n_batch=1024,
             n_ctx=8192,
         )
 # BUILD A VECTOR STORE
 index = VectorStoreIndex(nodes)
+@spaces.GPU(duration=120)
 def handle_query(query_str, chathistory):
     qa_prompt_str = (

requirements.txt CHANGED Viewed

@@ -9,4 +9,5 @@ llama-index-readers-file
 gradio
 transformers
 llama-cpp-agent>=0.2.25
-setuptools

 gradio
 transformers
 llama-cpp-agent>=0.2.25
+setuptools
+spaces