Spaces:
Sleeping
Sleeping
WilliamGazeley
commited on
Commit
·
7067b68
1
Parent(s):
341985c
Move flash attention install to runtime
Browse files- app.py +3 -0
- requirements.txt +0 -1
app.py
CHANGED
@@ -6,6 +6,9 @@ from utils import get_assistant_message
|
|
6 |
from functioncall import ModelInference
|
7 |
from prompter import PromptManager
|
8 |
|
|
|
|
|
|
|
9 |
|
10 |
@st.cache_resource(show_spinner="Loading model..")
|
11 |
def init_llm():
|
|
|
6 |
from functioncall import ModelInference
|
7 |
from prompter import PromptManager
|
8 |
|
9 |
+
# HACK
|
10 |
+
import subprocess
|
11 |
+
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
12 |
|
13 |
@st.cache_resource(show_spinner="Loading model..")
|
14 |
def init_llm():
|
requirements.txt
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
flash-attn==2.5.5
|
2 |
ninja==1.11.1.1
|
3 |
numpy==1.26.4
|
4 |
orjson==3.10.3
|
|
|
|
|
1 |
ninja==1.11.1.1
|
2 |
numpy==1.26.4
|
3 |
orjson==3.10.3
|