Spaces:

InvestmentResearchAI
/

LLM-ADE-dev

Sleeping

WilliamGazeley commited on May 8, 2024

Commit

7067b68

1 Parent(s): 341985c

Move flash attention install to runtime

Files changed (2) hide show

app.py CHANGED Viewed

@@ -6,6 +6,9 @@ from utils import get_assistant_message
 from functioncall import ModelInference
 from prompter import PromptManager
 @st.cache_resource(show_spinner="Loading model..")
 def init_llm():

 from functioncall import ModelInference
 from prompter import PromptManager
+# HACK
+import subprocess
+subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 @st.cache_resource(show_spinner="Loading model..")
 def init_llm():

requirements.txt CHANGED Viewed

@@ -1,4 +1,3 @@
-flash-attn==2.5.5
 ninja==1.11.1.1
 numpy==1.26.4
 orjson==3.10.3

 ninja==1.11.1.1
 numpy==1.26.4
 orjson==3.10.3