abcd66666 commited on
Commit
72ad303
·
verified ·
1 Parent(s): a390b9a

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +7 -15
  2. requirements.txt +5 -5
app.py CHANGED
@@ -1,22 +1,14 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
3
  import torch
4
 
5
- # 配置 4-bit 量化以适配 16GB GPU
6
- quantization_config = BitsAndBytesConfig(
7
- load_in_4bit=True,
8
- bnb_4bit_compute_dtype=torch.float16,
9
- bnb_4bit_quant_type="nf4",
10
- bnb_4bit_use_double_quant=True
11
- )
12
-
13
- # 加载 DeepSeek-Coder-6.7B-Instruct 模型
14
- model_name = "deepseek-ai/deepseek-coder-6.7b-instruct"
15
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
16
  model = AutoModelForCausalLM.from_pretrained(
17
  model_name,
18
- quantization_config=quantization_config,
19
- device_map="auto",
20
  trust_remote_code=True,
21
  low_cpu_mem_usage=True
22
  )
@@ -41,7 +33,7 @@ def respond(
41
 
42
  # 使用聊天模板格式化输入
43
  input_text = tokenizer.apply_chat_template(messages, tokenize=False)
44
- inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
45
 
46
  # 生成响应
47
  outputs = model.generate(
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
 
5
+ # 加载轻量模型(CPU 兼容)
6
+ tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/deepseek-coder-1.3b-base")
7
+ model = AutoModelForCausalLM.from_pretrained("deepseek-ai/deepseek-coder-1.3b-base")
 
 
 
 
 
 
 
 
8
  model = AutoModelForCausalLM.from_pretrained(
9
  model_name,
10
+ torch_dtype=torch.float16, # 使用 FP16 减少内存
11
+ device_map="cpu", # 强制使用 CPU
12
  trust_remote_code=True,
13
  low_cpu_mem_usage=True
14
  )
 
33
 
34
  # 使用聊天模板格式化输入
35
  input_text = tokenizer.apply_chat_template(messages, tokenize=False)
36
+ inputs = tokenizer(input_text, return_tensors="pt").to("cpu")
37
 
38
  # 生成响应
39
  outputs = model.generate(
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
- torch
2
- transformers
3
- accelerate
4
- gradio
5
- bitsandbytes # Optional, for 4-bit quantization
 
1
+ torch>=2.0.0
2
+ transformers>=4.38.0
3
+ accelerate>=0.26.0
4
+ gradio>=4.0.0
5
+ bitsandbytes>=0.43.3