Update app.py
Browse files
app.py
CHANGED
@@ -49,7 +49,7 @@ if USE_FLASH_ATTENTION:
|
|
49 |
attn_implementation="flash_attention_2"
|
50 |
|
51 |
model = AutoModelForCausalLM.from_pretrained(
|
52 |
-
|
53 |
quantization_config=quantization_config,
|
54 |
attn_implementation=attn_implementation,
|
55 |
torch_dtype=torch.bfloat16,
|
|
|
49 |
attn_implementation="flash_attention_2"
|
50 |
|
51 |
model = AutoModelForCausalLM.from_pretrained(
|
52 |
+
MODEL_ID,
|
53 |
quantization_config=quantization_config,
|
54 |
attn_implementation=attn_implementation,
|
55 |
torch_dtype=torch.bfloat16,
|