Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,7 @@ import gradio as gr
|
|
4 |
import spaces
|
5 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
6 |
import subprocess
|
7 |
-
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
8 |
|
9 |
BANNER_HTML = """
|
10 |
<p align="center">
|
@@ -37,7 +37,7 @@ def load_model(version):
|
|
37 |
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto", attn_implementation="flash_attention_2")
|
38 |
return f"Model {model_name} loaded."
|
39 |
|
40 |
-
@spaces.GPU(duration=
|
41 |
def stream_chat(message: str, history: list, system_prompt: str, model_version: str, temperature: float, max_new_tokens: int):
|
42 |
conversation = [{"role": "system", "content": system_prompt or DEFAULT_SYSTEM_PROMPT}]
|
43 |
for prompt, answer in history:
|
|
|
4 |
import spaces
|
5 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
6 |
import subprocess
|
7 |
+
#subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
8 |
|
9 |
BANNER_HTML = """
|
10 |
<p align="center">
|
|
|
37 |
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto", attn_implementation="flash_attention_2")
|
38 |
return f"Model {model_name} loaded."
|
39 |
|
40 |
+
@spaces.GPU(duration=60)
|
41 |
def stream_chat(message: str, history: list, system_prompt: str, model_version: str, temperature: float, max_new_tokens: int):
|
42 |
conversation = [{"role": "system", "content": system_prompt or DEFAULT_SYSTEM_PROMPT}]
|
43 |
for prompt, answer in history:
|