Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,6 +7,12 @@ from peft import AutoPeftModelForCausalLM
|
|
7 |
from transformers import AutoTokenizer
|
8 |
import torch
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
@spaces.GPU
|
11 |
def main():
|
12 |
|
@@ -25,6 +31,8 @@ def main():
|
|
25 |
messages = [{"role": "user", "content": prompts}]
|
26 |
|
27 |
input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
|
|
|
|
|
28 |
prompt_padded_len = len(input_ids[0])
|
29 |
|
30 |
# Generate the translation
|
@@ -63,6 +71,7 @@ def main():
|
|
63 |
|
64 |
|
65 |
model = AutoPeftModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, use_auth_token=hf_token)
|
|
|
66 |
tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=hf_token)
|
67 |
|
68 |
tokenizer_with_prefix_space = AutoTokenizer.from_pretrained(model_id, add_prefix_space=True, use_auth_token=hf_token)
|
|
|
7 |
from transformers import AutoTokenizer
|
8 |
import torch
|
9 |
|
10 |
+
import torch
|
11 |
+
print(f"Is CUDA available: {torch.cuda.is_available()}")
|
12 |
+
# True
|
13 |
+
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
|
14 |
+
# Tesla T4
|
15 |
+
|
16 |
@spaces.GPU
|
17 |
def main():
|
18 |
|
|
|
31 |
messages = [{"role": "user", "content": prompts}]
|
32 |
|
33 |
input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
|
34 |
+
input_ids = input_ids.to("cuda")
|
35 |
+
|
36 |
prompt_padded_len = len(input_ids[0])
|
37 |
|
38 |
# Generate the translation
|
|
|
71 |
|
72 |
|
73 |
model = AutoPeftModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, use_auth_token=hf_token)
|
74 |
+
model = model.to("cuda")
|
75 |
tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=hf_token)
|
76 |
|
77 |
tokenizer_with_prefix_space = AutoTokenizer.from_pretrained(model_id, add_prefix_space=True, use_auth_token=hf_token)
|