abcd66666 commited on
Commit
a390b9a
·
verified ·
1 Parent(s): 2ba29c1

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -9
app.py CHANGED
@@ -1,15 +1,24 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
 
5
- # 加载DeepSeek-Coder-6.7B模型
6
- model_name = "deepseek/DeepSeek-Coder-6.7B"
7
- tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
 
 
 
 
 
 
8
  model = AutoModelForCausalLM.from_pretrained(
9
  model_name,
10
- torch_dtype=torch.float16, # 使用半精度以减少显存
11
- device_map="auto", # 自动分配到GPU
12
- trust_remote_code=True # DeepSeek模型可能需要
 
13
  )
14
 
15
  def respond(
@@ -30,7 +39,7 @@ def respond(
30
 
31
  messages.append({"role": "user", "content": message})
32
 
33
- # 格式化输入
34
  input_text = tokenizer.apply_chat_template(messages, tokenize=False)
35
  inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
36
 
@@ -41,11 +50,12 @@ def respond(
41
  temperature=temperature,
42
  top_p=top_p,
43
  do_sample=True,
 
44
  )
45
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
46
  yield response
47
 
48
- # Gradio界面
49
  demo = gr.ChatInterface(
50
  respond,
51
  additional_inputs=[
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
3
  import torch
4
 
5
+ # 配置 4-bit 量化以适配 16GB GPU
6
+ quantization_config = BitsAndBytesConfig(
7
+ load_in_4bit=True,
8
+ bnb_4bit_compute_dtype=torch.float16,
9
+ bnb_4bit_quant_type="nf4",
10
+ bnb_4bit_use_double_quant=True
11
+ )
12
+
13
+ # 加载 DeepSeek-Coder-6.7B-Instruct 模型
14
+ model_name = "deepseek-ai/deepseek-coder-6.7b-instruct"
15
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
16
  model = AutoModelForCausalLM.from_pretrained(
17
  model_name,
18
+ quantization_config=quantization_config,
19
+ device_map="auto",
20
+ trust_remote_code=True,
21
+ low_cpu_mem_usage=True
22
  )
23
 
24
  def respond(
 
39
 
40
  messages.append({"role": "user", "content": message})
41
 
42
+ # 使用聊天模板格式化输入
43
  input_text = tokenizer.apply_chat_template(messages, tokenize=False)
44
  inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
45
 
 
50
  temperature=temperature,
51
  top_p=top_p,
52
  do_sample=True,
53
+ pad_token_id=tokenizer.eos_token_id
54
  )
55
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
56
  yield response
57
 
58
+ # Gradio 界面
59
  demo = gr.ChatInterface(
60
  respond,
61
  additional_inputs=[