1inkusFace commited on
Commit
417df1c
·
verified ·
1 Parent(s): 6d728a0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -3
app.py CHANGED
@@ -2,6 +2,20 @@ import spaces
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  model_name = "Qwen/Qwen2.5-Coder-14B-Instruct"
7
 
@@ -11,7 +25,7 @@ model = AutoModelForCausalLM.from_pretrained(
11
  torch_dtype="auto",
12
  # device_map="auto",
13
  trust_remote_code=True # Add this line for Qwen models
14
- ).to('cuda')
15
 
16
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) # Add this line for Qwen models
17
 
@@ -32,9 +46,8 @@ def generate_code(prompt):
32
  **model_inputs,
33
  max_new_tokens = 1024,
34
  min_new_tokens = 256,
35
- #low_memory = True,
36
  do_sample = True,
37
- #token_healing = True,
38
  #guidance_scale = 3.8,
39
  )
40
  generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
 
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
  import gradio as gr
5
+ impot os
6
+
7
+ torch.backends.cuda.matmul.allow_tf32 = False
8
+ torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
9
+ torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
10
+ torch.backends.cudnn.allow_tf32 = False
11
+ torch.backends.cudnn.deterministic = False
12
+ torch.backends.cudnn.benchmark = False
13
+ #torch.backends.cuda.preferred_blas_library="cublas"
14
+ # torch.backends.cuda.preferred_linalg_library="cusolver"
15
+
16
+ torch.set_float32_matmul_precision("highest")
17
+ os.putenv("HF_HUB_ENABLE_HF_TRANSFER","1")
18
+ os.environ["SAFETENSORS_FAST_GPU"] = "1"
19
 
20
  model_name = "Qwen/Qwen2.5-Coder-14B-Instruct"
21
 
 
25
  torch_dtype="auto",
26
  # device_map="auto",
27
  trust_remote_code=True # Add this line for Qwen models
28
+ ).to('cuda',torch.bfloat16)
29
 
30
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) # Add this line for Qwen models
31
 
 
46
  **model_inputs,
47
  max_new_tokens = 1024,
48
  min_new_tokens = 256,
49
+ low_memory = False,
50
  do_sample = True,
 
51
  #guidance_scale = 3.8,
52
  )
53
  generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]