Spaces:
Runtime error
Runtime error
leonardlin
commited on
Commit
•
bd17394
1
Parent(s):
39c14f3
add flash attention, reorder examples
Browse files
app.py
CHANGED
@@ -18,8 +18,8 @@ description = "Test out Shisa 7B in either English or Japanese. If you aren't ge
|
|
18 |
placeholder = "Type Here / ここに入力してください"
|
19 |
examples = [
|
20 |
["What are the best slices of pizza in New York City?"],
|
21 |
-
['How do I program a simple "hello world" in Python?'],
|
22 |
["東京でおすすめのラーメン屋ってどこ?"],
|
|
|
23 |
["Pythonでシンプルな「ハローワールド」をプログラムするにはどうすればいいですか?"],
|
24 |
]
|
25 |
|
@@ -40,6 +40,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
40 |
bnb_4bit_use_double_quant=True,
|
41 |
bnb_4bit_compute_dtype=torch.bfloat16
|
42 |
),
|
|
|
43 |
)
|
44 |
streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
|
45 |
|
|
|
18 |
placeholder = "Type Here / ここに入力してください"
|
19 |
examples = [
|
20 |
["What are the best slices of pizza in New York City?"],
|
|
|
21 |
["東京でおすすめのラーメン屋ってどこ?"],
|
22 |
+
['How do I program a simple "hello world" in Python?'],
|
23 |
["Pythonでシンプルな「ハローワールド」をプログラムするにはどうすればいいですか?"],
|
24 |
]
|
25 |
|
|
|
40 |
bnb_4bit_use_double_quant=True,
|
41 |
bnb_4bit_compute_dtype=torch.bfloat16
|
42 |
),
|
43 |
+
use_flash_attention_2=True
|
44 |
)
|
45 |
streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
|
46 |
|