r1208 commited on
Commit
1b52551
·
verified ·
1 Parent(s): bb6cfb1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -0
app.py CHANGED
@@ -7,6 +7,12 @@ from peft import AutoPeftModelForCausalLM
7
  from transformers import AutoTokenizer
8
  import torch
9
 
 
 
 
 
 
 
10
  @spaces.GPU
11
  def main():
12
 
@@ -25,6 +31,8 @@ def main():
25
  messages = [{"role": "user", "content": prompts}]
26
 
27
  input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
 
 
28
  prompt_padded_len = len(input_ids[0])
29
 
30
  # Generate the translation
@@ -63,6 +71,7 @@ def main():
63
 
64
 
65
  model = AutoPeftModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, use_auth_token=hf_token)
 
66
  tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=hf_token)
67
 
68
  tokenizer_with_prefix_space = AutoTokenizer.from_pretrained(model_id, add_prefix_space=True, use_auth_token=hf_token)
 
7
  from transformers import AutoTokenizer
8
  import torch
9
 
10
+ import torch
11
+ print(f"Is CUDA available: {torch.cuda.is_available()}")
12
+ # True
13
+ print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
14
+ # Tesla T4
15
+
16
  @spaces.GPU
17
  def main():
18
 
 
31
  messages = [{"role": "user", "content": prompts}]
32
 
33
  input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
34
+ input_ids = input_ids.to("cuda")
35
+
36
  prompt_padded_len = len(input_ids[0])
37
 
38
  # Generate the translation
 
71
 
72
 
73
  model = AutoPeftModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, use_auth_token=hf_token)
74
+ model = model.to("cuda")
75
  tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=hf_token)
76
 
77
  tokenizer_with_prefix_space = AutoTokenizer.from_pretrained(model_id, add_prefix_space=True, use_auth_token=hf_token)