Spaces:
Sleeping
Sleeping
update to gpu
Browse files
app.py
CHANGED
@@ -12,7 +12,7 @@ from model.tokenizer_utils import generate_custom_mask, prepare_tokenizer
|
|
12 |
|
13 |
access_token = os.getenv("HF_TOKEN")
|
14 |
model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
15 |
-
device = '
|
16 |
|
17 |
model = LlamaskForCausalLM.from_pretrained(model_id, torch_dtype= torch.bfloat16, token=access_token)
|
18 |
model = model.to(device)
|
@@ -48,7 +48,7 @@ demo = gr.ChatInterface(
|
|
48 |
respond,
|
49 |
additional_inputs=[
|
50 |
gr.Slider(minimum=1, maximum=128, value=32, step=1, label="Max new tokens"),
|
51 |
-
gr.Slider(minimum=0.1, maximum=
|
52 |
],
|
53 |
)
|
54 |
|
|
|
12 |
|
13 |
access_token = os.getenv("HF_TOKEN")
|
14 |
model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
15 |
+
device = 'cuda'
|
16 |
|
17 |
model = LlamaskForCausalLM.from_pretrained(model_id, torch_dtype= torch.bfloat16, token=access_token)
|
18 |
model = model.to(device)
|
|
|
48 |
respond,
|
49 |
additional_inputs=[
|
50 |
gr.Slider(minimum=1, maximum=128, value=32, step=1, label="Max new tokens"),
|
51 |
+
gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature"),
|
52 |
],
|
53 |
)
|
54 |
|