WilliamGazeley commited on
Commit
7067b68
·
1 Parent(s): 341985c

Move flash attention install to runtime

Browse files
Files changed (2) hide show
  1. app.py +3 -0
  2. requirements.txt +0 -1
app.py CHANGED
@@ -6,6 +6,9 @@ from utils import get_assistant_message
6
  from functioncall import ModelInference
7
  from prompter import PromptManager
8
 
 
 
 
9
 
10
  @st.cache_resource(show_spinner="Loading model..")
11
  def init_llm():
 
6
  from functioncall import ModelInference
7
  from prompter import PromptManager
8
 
9
+ # HACK
10
+ import subprocess
11
+ subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
12
 
13
  @st.cache_resource(show_spinner="Loading model..")
14
  def init_llm():
requirements.txt CHANGED
@@ -1,4 +1,3 @@
1
- flash-attn==2.5.5
2
  ninja==1.11.1.1
3
  numpy==1.26.4
4
  orjson==3.10.3
 
 
1
  ninja==1.11.1.1
2
  numpy==1.26.4
3
  orjson==3.10.3